Merge branch 'main' into litellm_invite_link_flow_2

2024-05-31 08:14:52 -07:00 · 2024-05-31 08:14:52 -07:00 · f9862be049
commit f9862be049
parent eee60b0686 49952983e1
100 changed files with 5297 additions and 883 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -2,7 +2,7 @@ version: 4.3.4
 jobs:
  local_testing:
    docker:
-      - image: circleci/python:3.9
+      - image: circleci/python:3.11.8
    working_directory: ~/project

    steps:
@ -43,7 +43,7 @@ jobs:
            pip install "langfuse==2.27.1"
            pip install "logfire==0.29.0"
            pip install numpydoc
-            pip install traceloop-sdk==0.18.2
+            pip install traceloop-sdk==0.21.1
            pip install openai
            pip install prisma            
            pip install "httpx==0.24.1"
@ -61,6 +61,7 @@ jobs:
            pip install prometheus-client==0.20.0
            pip install "pydantic==2.7.1"
            pip install "diskcache==5.6.1"
+            pip install "Pillow==10.3.0"
      - save_cache:
          paths:
            - ./venv
--- a/.circleci/requirements.txt
+++ b/.circleci/requirements.txt
@ -7,6 +7,5 @@ cohere
 redis
 anthropic
 orjson
-pydantic==1.10.14
+pydantic==2.7.1
 google-cloud-aiplatform==1.43.0
-redisvl==0.0.7 # semantic caching
--- a/docs/my-website/docs/completion/batching.md
+++ b/docs/my-website/docs/completion/batching.md
@ -1,3 +1,6 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # Batching Completion()
 LiteLLM allows you to:
 * Send many completion calls to 1 model
@ -51,6 +54,9 @@ This makes parallel calls to the specified `models` and returns the first respon

 Use this to reduce latency

+<Tabs>
+<TabItem value="sdk" label="SDK">
+
 ### Example Code
 ```python
 import litellm
@ -68,8 +74,93 @@ response = batch_completion_models(
 print(result)
 ```

+
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+[how to setup proxy config](#example-setup)
+
+Just pass a comma-separated string of model names and the flag `fastest_response=True`.
+
+<Tabs>
+<TabItem value="curl" label="curl">
+
+```bash
+
+curl -X POST 'http://localhost:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \ 
+-D '{
+    "model": "gpt-4o, groq-llama", # 👈 Comma-separated models
+    "messages": [
+      {
+        "role": "user",
+        "content": "What's the weather like in Boston today?"
+      }
+    ],
+    "stream": true,
+    "fastest_response": true # 👈 FLAG
+}
+
+'
+```
+
+</TabItem>
+<TabItem value="openai" label="OpenAI SDK">
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(
+    model="gpt-4o, groq-llama", # 👈 Comma-separated models
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test request, write a short poem"
+        }
+    ],
+    extra_body={"fastest_response": true} # 👈 FLAG
+)
+
+print(response)
+```
+
+</TabItem>
+</Tabs>
+
+---
+
+### Example Setup: 
+
+```yaml 
+model_list: 
+- model_name: groq-llama
+  litellm_params:
+    model: groq/llama3-8b-8192
+    api_key: os.environ/GROQ_API_KEY
+- model_name: gpt-4o
+  litellm_params:
+    model: gpt-4o
+    api_key: os.environ/OPENAI_API_KEY
+```
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+</TabItem>
+</Tabs>
+
 ### Output
-Returns the first response
+Returns the first response in OpenAI format. Cancels other LLM API calls. 
 ```json
 {
  "object": "chat.completion",
@ -95,6 +186,7 @@ Returns the first response
 }
 ```

+
 ## Send 1 completion call to many models: Return All Responses
 This makes parallel calls to the specified models and returns all responses

--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@ -178,23 +178,26 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
 }
 ```

-**API Spec for Webhook Event**
+## **API Spec for Webhook Event**

 - `spend` *float*: The current spend amount for the 'event_group'.
- `max_budget` *float*: The maximum allowed budget for the 'event_group'.
+- `max_budget` *float or null*: The maximum allowed budget for the 'event_group'. null if not set. 
 - `token` *str*: A hashed value of the key, used for authentication or identification purposes.
- `user_id` *str or null*: The ID of the user associated with the event (optional).
+- `customer_id` *str or null*: The ID of the customer associated with the event (optional).
+- `internal_user_id` *str or null*: The ID of the internal user associated with the event (optional).
 - `team_id` *str or null*: The ID of the team associated with the event (optional).
- `user_email` *str or null*: The email of the user associated with the event (optional).
+- `user_email` *str or null*: The email of the internal user associated with the event (optional).
 - `key_alias` *str or null*: An alias for the key associated with the event (optional).
 - `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
 - `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
 - `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
+    * "spend_tracked": Emitted whenver spend is tracked for a customer id. 
    * "budget_crossed": Indicates that the spend has exceeded the max budget.
    * "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
    * "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
- `event_group` *Literal["user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
-    * "user": The event is related to a specific user.
+- `event_group` *Literal["customer", "internal_user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
+    * "customer": The event is related to a specific customer
+    * "internal_user": The event is related to a specific internal user.
    * "key": The event is related to a specific key.
    * "team": The event is related to a team.
    * "proxy": The event is related to a proxy.
--- a/docs/my-website/docs/proxy/customers.md
+++ b/docs/my-website/docs/proxy/customers.md
@ -0,0 +1,251 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# 🙋‍♂️ Customers 
+
+Track spend, set budgets for your customers.
+
+## Tracking Customer Credit
+
+### 1. Make LLM API call w/ Customer ID
+
+Make a /chat/completions call, pass 'user' - First call Works
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+        --header 'Content-Type: application/json' \
+        --header 'Authorization: Bearer sk-1234' \ # 👈 YOUR PROXY KEY
+        --data ' {
+        "model": "azure-gpt-3.5",
+        "user": "ishaan3", # 👈 CUSTOMER ID
+        "messages": [
+            {
+            "role": "user",
+            "content": "what time is it"
+            }
+        ]
+        }'
+```
+
+The customer_id will be upserted into the DB with the new spend.
+
+If the customer_id already exists, spend will be incremented.
+
+### 2. Get Customer Spend 
+
+<Tabs>
+<TabItem value="all-up" label="All-up spend">
+
+Call `/customer/info` to get a customer's all up spend
+
+```bash
+curl -X GET 'http://0.0.0.0:4000/customer/info?end_user_id=ishaan3' \ # 👈 CUSTOMER ID
+        -H 'Authorization: Bearer sk-1234' \ # 👈 YOUR PROXY KEY
+```
+
+Expected Response:
+
+```
+{
+    "user_id": "ishaan3",
+    "blocked": false,
+    "alias": null,
+    "spend": 0.001413,
+    "allowed_model_region": null,
+    "default_model": null,
+    "litellm_budget_table": null
+}
+```
+
+</TabItem>
+<TabItem value="event-webhook" label="Event Webhook">
+
+To update spend in your client-side DB, point the proxy to your webhook. 
+
+E.g. if your server is `https://webhook.site` and your listening on `6ab090e8-c55f-4a23-b075-3209f5c57906`
+
+1. Add webhook url to your proxy environment: 
+
+```bash
+export WEBHOOK_URL="https://webhook.site/6ab090e8-c55f-4a23-b075-3209f5c57906"
+```
+
+2. Add 'webhook' to config.yaml
+
+```yaml
+general_settings: 
+  alerting: ["webhook"] # 👈 KEY CHANGE
+```
+
+3. Test it! 
+
+```bash
+curl -X POST 'http://localhost:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "model": "mistral",
+    "messages": [
+        {
+        "role": "user",
+        "content": "What's the weather like in Boston today?"
+        }
+    ],
+    "user": "krrish12"
+}
+'
+```
+
+Expected Response 
+
+```json
+{
+  "spend": 0.0011120000000000001, # 👈 SPEND
+  "max_budget": null,
+  "token": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+  "customer_id": "krrish12",  # 👈 CUSTOMER ID
+  "user_id": null,
+  "team_id": null,
+  "user_email": null,
+  "key_alias": null,
+  "projected_exceeded_date": null,
+  "projected_spend": null,
+  "event": "spend_tracked",
+  "event_group": "customer",
+  "event_message": "Customer spend tracked. Customer=krrish12, spend=0.0011120000000000001"
+}
+```
+
+[See Webhook Spec](./alerting.md#api-spec-for-webhook-event)
+
+</TabItem>
+</Tabs>
+
+
+## Setting Customer Budgets 
+
+Set customer budgets (e.g. monthly budgets, tpm/rpm limits) on LiteLLM Proxy 
+
+### Quick Start 
+
+Create / Update a customer with budget
+
+**Create New Customer w/ budget**
+```bash
+curl -X POST 'http://0.0.0.0:4000/customer/new'         
+    -H 'Authorization: Bearer sk-1234'         
+    -H 'Content-Type: application/json'         
+    -D '{
+        "user_id" : "my-customer-id",
+        "max_budget": "0", # 👈 CAN BE FLOAT
+    }'
+```
+
+**Test it!**
+
+```bash
+curl -X POST 'http://localhost:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "model": "mistral",
+    "messages": [
+        {
+        "role": "user",
+        "content": "What'\''s the weather like in Boston today?"
+        }
+    ],
+    "user": "ishaan-jaff-48"
+}
+```
+
+### Assign Pricing Tiers
+
+Create and assign customers to pricing tiers.
+
+#### 1. Create a budget
+
+<Tabs>
+<TabItem value="ui" label="UI">
+
+- Go to the 'Budgets' tab on the UI. 
+- Click on '+ Create Budget'.
+- Create your pricing tier (e.g. 'my-free-tier' with budget $4). This means each user on this pricing tier will have a max budget of $4. 
+
+<Image img={require('../../img/create_budget_modal.png')} />
+
+</TabItem>
+<TabItem value="api" label="API">
+
+Use the `/budget/new` endpoint for creating a new budget. [API Reference](https://litellm-api.up.railway.app/#/budget%20management/new_budget_budget_new_post)
+
+```bash
+curl -X POST 'http://localhost:4000/budget/new' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "budget_id": "my-free-tier", 
+    "max_budget": 4 
+}
+```
+
+</TabItem>
+</Tabs>
+
+
+#### 2. Assign Budget to Customer 
+
+In your application code, assign budget when creating a new customer. 
+
+Just use the `budget_id` used when creating the budget. In our example, this is `my-free-tier`.
+
+```bash
+curl -X POST 'http://localhost:4000/customer/new' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "user_id": "my-customer-id",
+    "budget_id": "my-free-tier" # 👈 KEY CHANGE
+}
+```
+
+#### 3. Test it! 
+
+<Tabs>
+<TabItem value="curl" label="curl">
+
+```bash
+curl -X POST 'http://localhost:4000/customer/new' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "user_id": "my-customer-id",
+    "budget_id": "my-free-tier" # 👈 KEY CHANGE
+}
+```
+
+</TabItem>
+<TabItem value="openai" label="OpenAI">
+
+```python
+from openai import OpenAI
+client = OpenAI(
+  base_url="<your_proxy_base_url",
+  api_key="<your_proxy_key>"
+)
+
+completion = client.chat.completions.create(
+  model="gpt-3.5-turbo",
+  messages=[
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Hello!"}
+  ],
+  user="my-customer-id"
+)
+
+print(completion.choices[0].message)
+```
+
+</TabItem>
+</Tabs>
--- a/docs/my-website/docs/proxy/users.md
+++ b/docs/my-website/docs/proxy/users.md
@ -223,7 +223,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \

 Error
 ```shell
-{"error":{"message":"Authentication Error, ExceededBudget: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%                
+{"error":{"message":"Budget has been exceeded: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%                
 ```

 </TabItem>
--- a/docs/my-website/img/create_budget_modal.png
+++ b/docs/my-website/img/create_budget_modal.png
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -41,6 +41,7 @@ const sidebars = {
        "proxy/reliability",
        "proxy/cost_tracking",
        "proxy/users",
+        "proxy/customers",
        "proxy/billing",
        "proxy/user_keys",
        "proxy/enterprise",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -6,7 +6,13 @@ warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*
 import threading, requests, os
 from typing import Callable, List, Optional, Dict, Union, Any, Literal
 from litellm.caching import Cache
-from litellm._logging import set_verbose, _turn_on_debug, verbose_logger, json_logs
+from litellm._logging import (
+    set_verbose,
+    _turn_on_debug,
+    verbose_logger,
+    json_logs,
+    _turn_on_json,
+)
 from litellm.proxy._types import (
    KeyManagementSystem,
    KeyManagementSettings,
@ -221,7 +227,7 @@ default_team_settings: Optional[List] = None
 max_user_budget: Optional[float] = None
 max_end_user_budget: Optional[float] = None
 #### RELIABILITY ####
-request_timeout: Optional[float] = 6000
+request_timeout: float = 6000
 num_retries: Optional[int] = None  # per model endpoint
 default_fallbacks: Optional[List] = None
 fallbacks: Optional[List] = None
@ -298,6 +304,7 @@ api_base = None
 headers = None
 api_version = None
 organization = None
+project = None
 config_path = None
 ####### COMPLETION MODELS ###################
 open_ai_chat_completion_models: List = []
@ -797,3 +804,4 @@ from .budget_manager import BudgetManager
 from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
+from .batches.main import *
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -39,6 +39,16 @@ verbose_proxy_logger.addHandler(handler)
 verbose_logger.addHandler(handler)


+def _turn_on_json():
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.DEBUG)
+    handler.setFormatter(JsonFormatter())
+
+    verbose_router_logger.addHandler(handler)
+    verbose_proxy_logger.addHandler(handler)
+    verbose_logger.addHandler(handler)
+
+
 def _turn_on_debug():
    verbose_logger.setLevel(level=logging.DEBUG)  # set package log to debug
    verbose_router_logger.setLevel(level=logging.DEBUG)  # set router logs to debug
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -0,0 +1,589 @@
+"""
+Main File for Batches API implementation
+
+https://platform.openai.com/docs/api-reference/batch
+
+- create_batch()
+- retrieve_batch()
+- cancel_batch()
+- list_batch()
+
+"""
+
+import os
+import asyncio
+from functools import partial
+import contextvars
+from typing import Literal, Optional, Dict, Coroutine, Any, Union
+import httpx
+
+import litellm
+from litellm import client
+from litellm.utils import supports_httpx_timeout
+from ..types.router import *
+from ..llms.openai import OpenAIBatchesAPI, OpenAIFilesAPI
+from ..types.llms.openai import (
+    CreateBatchRequest,
+    RetrieveBatchRequest,
+    CancelBatchRequest,
+    CreateFileRequest,
+    FileTypes,
+    FileObject,
+    Batch,
+    FileContentRequest,
+    HttpxBinaryResponseContent,
+)
+
+####### ENVIRONMENT VARIABLES ###################
+openai_batches_instance = OpenAIBatchesAPI()
+openai_files_instance = OpenAIFilesAPI()
+#################################################
+
+
+async def acreate_file(
+    file: FileTypes,
+    purpose: Literal["assistants", "batch", "fine-tune"],
+    custom_llm_provider: Literal["openai"] = "openai",
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Coroutine[Any, Any, FileObject]:
+    """
+    Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
+
+    LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["acreate_file"] = True
+
+        # Use a partial function to pass your keyword arguments
+        func = partial(
+            create_file,
+            file,
+            purpose,
+            custom_llm_provider,
+            extra_headers,
+            extra_body,
+            **kwargs,
+        )
+
+        # Add the context to the function
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response  # type: ignore
+
+        return response
+    except Exception as e:
+        raise e
+
+
+def create_file(
+    file: FileTypes,
+    purpose: Literal["assistants", "batch", "fine-tune"],
+    custom_llm_provider: Literal["openai"] = "openai",
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
+    """
+    Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
+
+    LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
+    """
+    try:
+        optional_params = GenericLiteLLMParams(**kwargs)
+        if custom_llm_provider == "openai":
+            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
+            api_base = (
+                optional_params.api_base
+                or litellm.api_base
+                or os.getenv("OPENAI_API_BASE")
+                or "https://api.openai.com/v1"
+            )
+            organization = (
+                optional_params.organization
+                or litellm.organization
+                or os.getenv("OPENAI_ORGANIZATION", None)
+                or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+            )
+            # set API KEY
+            api_key = (
+                optional_params.api_key
+                or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
+                or litellm.openai_key
+                or os.getenv("OPENAI_API_KEY")
+            )
+            ### TIMEOUT LOGIC ###
+            timeout = (
+                optional_params.timeout or kwargs.get("request_timeout", 600) or 600
+            )
+            # set timeout for 10 minutes by default
+
+            if (
+                timeout is not None
+                and isinstance(timeout, httpx.Timeout)
+                and supports_httpx_timeout(custom_llm_provider) == False
+            ):
+                read_timeout = timeout.read or 600
+                timeout = read_timeout  # default 10 min timeout
+            elif timeout is not None and not isinstance(timeout, httpx.Timeout):
+                timeout = float(timeout)  # type: ignore
+            elif timeout is None:
+                timeout = 600.0
+
+            _create_file_request = CreateFileRequest(
+                file=file,
+                purpose=purpose,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+            )
+
+            _is_async = kwargs.pop("acreate_file", False) is True
+
+            response = openai_files_instance.create_file(
+                _is_async=_is_async,
+                api_base=api_base,
+                api_key=api_key,
+                timeout=timeout,
+                max_retries=optional_params.max_retries,
+                organization=organization,
+                create_file_data=_create_file_request,
+            )
+        else:
+            raise litellm.exceptions.BadRequestError(
+                message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
+                    custom_llm_provider
+                ),
+                model="n/a",
+                llm_provider=custom_llm_provider,
+                response=httpx.Response(
+                    status_code=400,
+                    content="Unsupported provider",
+                    request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                ),
+            )
+        return response
+    except Exception as e:
+        raise e
+
+
+async def afile_content(
+    file_id: str,
+    custom_llm_provider: Literal["openai"] = "openai",
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
+    """
+    Async: Get file contents
+
+    LiteLLM Equivalent of GET https://api.openai.com/v1/files
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["afile_content"] = True
+
+        # Use a partial function to pass your keyword arguments
+        func = partial(
+            file_content,
+            file_id,
+            custom_llm_provider,
+            extra_headers,
+            extra_body,
+            **kwargs,
+        )
+
+        # Add the context to the function
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response  # type: ignore
+
+        return response
+    except Exception as e:
+        raise e
+
+
+def file_content(
+    file_id: str,
+    custom_llm_provider: Literal["openai"] = "openai",
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
+    """
+    Returns the contents of the specified file.
+
+    LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
+    """
+    try:
+        optional_params = GenericLiteLLMParams(**kwargs)
+        if custom_llm_provider == "openai":
+            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
+            api_base = (
+                optional_params.api_base
+                or litellm.api_base
+                or os.getenv("OPENAI_API_BASE")
+                or "https://api.openai.com/v1"
+            )
+            organization = (
+                optional_params.organization
+                or litellm.organization
+                or os.getenv("OPENAI_ORGANIZATION", None)
+                or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+            )
+            # set API KEY
+            api_key = (
+                optional_params.api_key
+                or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
+                or litellm.openai_key
+                or os.getenv("OPENAI_API_KEY")
+            )
+            ### TIMEOUT LOGIC ###
+            timeout = (
+                optional_params.timeout or kwargs.get("request_timeout", 600) or 600
+            )
+            # set timeout for 10 minutes by default
+
+            if (
+                timeout is not None
+                and isinstance(timeout, httpx.Timeout)
+                and supports_httpx_timeout(custom_llm_provider) == False
+            ):
+                read_timeout = timeout.read or 600
+                timeout = read_timeout  # default 10 min timeout
+            elif timeout is not None and not isinstance(timeout, httpx.Timeout):
+                timeout = float(timeout)  # type: ignore
+            elif timeout is None:
+                timeout = 600.0
+
+            _file_content_request = FileContentRequest(
+                file_id=file_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+            )
+
+            _is_async = kwargs.pop("afile_content", False) is True
+
+            response = openai_files_instance.file_content(
+                _is_async=_is_async,
+                file_content_request=_file_content_request,
+                api_base=api_base,
+                api_key=api_key,
+                timeout=timeout,
+                max_retries=optional_params.max_retries,
+                organization=organization,
+            )
+        else:
+            raise litellm.exceptions.BadRequestError(
+                message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
+                    custom_llm_provider
+                ),
+                model="n/a",
+                llm_provider=custom_llm_provider,
+                response=httpx.Response(
+                    status_code=400,
+                    content="Unsupported provider",
+                    request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                ),
+            )
+        return response
+    except Exception as e:
+        raise e
+
+
+async def acreate_batch(
+    completion_window: Literal["24h"],
+    endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+    input_file_id: str,
+    custom_llm_provider: Literal["openai"] = "openai",
+    metadata: Optional[Dict[str, str]] = None,
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Coroutine[Any, Any, Batch]:
+    """
+    Async: Creates and executes a batch from an uploaded file of request
+
+    LiteLLM Equivalent of POST: https://api.openai.com/v1/batches
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["acreate_batch"] = True
+
+        # Use a partial function to pass your keyword arguments
+        func = partial(
+            create_batch,
+            completion_window,
+            endpoint,
+            input_file_id,
+            custom_llm_provider,
+            metadata,
+            extra_headers,
+            extra_body,
+            **kwargs,
+        )
+
+        # Add the context to the function
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response  # type: ignore
+
+        return response
+    except Exception as e:
+        raise e
+
+
+def create_batch(
+    completion_window: Literal["24h"],
+    endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
+    input_file_id: str,
+    custom_llm_provider: Literal["openai"] = "openai",
+    metadata: Optional[Dict[str, str]] = None,
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Union[Batch, Coroutine[Any, Any, Batch]]:
+    """
+    Creates and executes a batch from an uploaded file of request
+
+    LiteLLM Equivalent of POST: https://api.openai.com/v1/batches
+    """
+    try:
+        optional_params = GenericLiteLLMParams(**kwargs)
+        if custom_llm_provider == "openai":
+
+            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
+            api_base = (
+                optional_params.api_base
+                or litellm.api_base
+                or os.getenv("OPENAI_API_BASE")
+                or "https://api.openai.com/v1"
+            )
+            organization = (
+                optional_params.organization
+                or litellm.organization
+                or os.getenv("OPENAI_ORGANIZATION", None)
+                or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+            )
+            # set API KEY
+            api_key = (
+                optional_params.api_key
+                or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
+                or litellm.openai_key
+                or os.getenv("OPENAI_API_KEY")
+            )
+            ### TIMEOUT LOGIC ###
+            timeout = (
+                optional_params.timeout or kwargs.get("request_timeout", 600) or 600
+            )
+            # set timeout for 10 minutes by default
+
+            if (
+                timeout is not None
+                and isinstance(timeout, httpx.Timeout)
+                and supports_httpx_timeout(custom_llm_provider) == False
+            ):
+                read_timeout = timeout.read or 600
+                timeout = read_timeout  # default 10 min timeout
+            elif timeout is not None and not isinstance(timeout, httpx.Timeout):
+                timeout = float(timeout)  # type: ignore
+            elif timeout is None:
+                timeout = 600.0
+
+            _is_async = kwargs.pop("acreate_batch", False) is True
+
+            _create_batch_request = CreateBatchRequest(
+                completion_window=completion_window,
+                endpoint=endpoint,
+                input_file_id=input_file_id,
+                metadata=metadata,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+            )
+
+            response = openai_batches_instance.create_batch(
+                api_base=api_base,
+                api_key=api_key,
+                organization=organization,
+                create_batch_data=_create_batch_request,
+                timeout=timeout,
+                max_retries=optional_params.max_retries,
+                _is_async=_is_async,
+            )
+        else:
+            raise litellm.exceptions.BadRequestError(
+                message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
+                    custom_llm_provider
+                ),
+                model="n/a",
+                llm_provider=custom_llm_provider,
+                response=httpx.Response(
+                    status_code=400,
+                    content="Unsupported provider",
+                    request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                ),
+            )
+        return response
+    except Exception as e:
+        raise e
+
+
+async def aretrieve_batch(
+    batch_id: str,
+    custom_llm_provider: Literal["openai"] = "openai",
+    metadata: Optional[Dict[str, str]] = None,
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Coroutine[Any, Any, Batch]:
+    """
+    Async: Retrieves a batch.
+
+    LiteLLM Equivalent of GET https://api.openai.com/v1/batches/{batch_id}
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["aretrieve_batch"] = True
+
+        # Use a partial function to pass your keyword arguments
+        func = partial(
+            retrieve_batch,
+            batch_id,
+            custom_llm_provider,
+            metadata,
+            extra_headers,
+            extra_body,
+            **kwargs,
+        )
+
+        # Add the context to the function
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response  # type: ignore
+
+        return response
+    except Exception as e:
+        raise e
+
+
+def retrieve_batch(
+    batch_id: str,
+    custom_llm_provider: Literal["openai"] = "openai",
+    metadata: Optional[Dict[str, str]] = None,
+    extra_headers: Optional[Dict[str, str]] = None,
+    extra_body: Optional[Dict[str, str]] = None,
+    **kwargs,
+) -> Union[Batch, Coroutine[Any, Any, Batch]]:
+    """
+    Retrieves a batch.
+
+    LiteLLM Equivalent of GET https://api.openai.com/v1/batches/{batch_id}
+    """
+    try:
+        optional_params = GenericLiteLLMParams(**kwargs)
+        if custom_llm_provider == "openai":
+
+            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
+            api_base = (
+                optional_params.api_base
+                or litellm.api_base
+                or os.getenv("OPENAI_API_BASE")
+                or "https://api.openai.com/v1"
+            )
+            organization = (
+                optional_params.organization
+                or litellm.organization
+                or os.getenv("OPENAI_ORGANIZATION", None)
+                or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+            )
+            # set API KEY
+            api_key = (
+                optional_params.api_key
+                or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
+                or litellm.openai_key
+                or os.getenv("OPENAI_API_KEY")
+            )
+            ### TIMEOUT LOGIC ###
+            timeout = (
+                optional_params.timeout or kwargs.get("request_timeout", 600) or 600
+            )
+            # set timeout for 10 minutes by default
+
+            if (
+                timeout is not None
+                and isinstance(timeout, httpx.Timeout)
+                and supports_httpx_timeout(custom_llm_provider) == False
+            ):
+                read_timeout = timeout.read or 600
+                timeout = read_timeout  # default 10 min timeout
+            elif timeout is not None and not isinstance(timeout, httpx.Timeout):
+                timeout = float(timeout)  # type: ignore
+            elif timeout is None:
+                timeout = 600.0
+
+            _retrieve_batch_request = RetrieveBatchRequest(
+                batch_id=batch_id,
+                extra_headers=extra_headers,
+                extra_body=extra_body,
+            )
+
+            _is_async = kwargs.pop("aretrieve_batch", False) is True
+
+            response = openai_batches_instance.retrieve_batch(
+                _is_async=_is_async,
+                retrieve_batch_data=_retrieve_batch_request,
+                api_base=api_base,
+                api_key=api_key,
+                organization=organization,
+                timeout=timeout,
+                max_retries=optional_params.max_retries,
+            )
+        else:
+            raise litellm.exceptions.BadRequestError(
+                message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
+                    custom_llm_provider
+                ),
+                model="n/a",
+                llm_provider=custom_llm_provider,
+                response=httpx.Response(
+                    status_code=400,
+                    content="Unsupported provider",
+                    request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                ),
+            )
+        return response
+    except Exception as e:
+        raise e
+
+
+def cancel_batch():
+    pass
+
+
+def list_batch():
+    pass
+
+
+async def acancel_batch():
+    pass
+
+
+async def alist_batch():
+    pass
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -314,6 +314,7 @@ class BudgetExceededError(Exception):
        self.current_cost = current_cost
        self.max_budget = max_budget
        message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
+        self.message = message
        super().__init__(message)


--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -455,8 +455,13 @@ class LangFuseLogger:
                }
            generation_name = clean_metadata.pop("generation_name", None)
            if generation_name is None:
-                # just log `litellm-{call_type}` as the generation name
+                # if `generation_name` is None, use sensible default values
+                # If using litellm proxy user `key_alias` if not None
+                # If `key_alias` is None, just log `litellm-{call_type}` as the generation name
+                _user_api_key_alias = clean_metadata.get("user_api_key_alias", None)
                generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
+                if _user_api_key_alias is not None:
+                    generation_name = f"litellm:{_user_api_key_alias}"

            if response_obj is not None and "system_fingerprint" in response_obj:
                system_fingerprint = response_obj.get("system_fingerprint", None)
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@ -41,6 +41,7 @@ class ProviderRegionOutageModel(BaseOutageModel):

 # we use this for the email header, please send a test email if you change this. verify it looks good on email
 LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
+LITELLM_SUPPORT_CONTACT = "support@berri.ai"


 class LiteLLMBase(BaseModel):
@ -683,14 +684,16 @@ class SlackAlerting(CustomLogger):
        event: Optional[
            Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
        ] = None
-        event_group: Optional[Literal["user", "team", "key", "proxy"]] = None
+        event_group: Optional[
+            Literal["internal_user", "team", "key", "proxy", "customer"]
+        ] = None
        event_message: str = ""
        webhook_event: Optional[WebhookEvent] = None
        if type == "proxy_budget":
            event_group = "proxy"
            event_message += "Proxy Budget: "
        elif type == "user_budget":
-            event_group = "user"
+            event_group = "internal_user"
            event_message += "User Budget: "
            _id = user_info.user_id or _id
        elif type == "team_budget":
@ -754,6 +757,36 @@ class SlackAlerting(CustomLogger):
            return
        return

+    async def customer_spend_alert(
+        self,
+        token: Optional[str],
+        key_alias: Optional[str],
+        end_user_id: Optional[str],
+        response_cost: Optional[float],
+        max_budget: Optional[float],
+    ):
+        if end_user_id is not None and token is not None and response_cost is not None:
+            # log customer spend
+            event = WebhookEvent(
+                spend=response_cost,
+                max_budget=max_budget,
+                token=token,
+                customer_id=end_user_id,
+                user_id=None,
+                team_id=None,
+                user_email=None,
+                key_alias=key_alias,
+                projected_exceeded_date=None,
+                projected_spend=None,
+                event="spend_tracked",
+                event_group="customer",
+                event_message="Customer spend tracked. Customer={}, spend={}".format(
+                    end_user_id, response_cost
+                ),
+            )
+
+            await self.send_webhook_alert(webhook_event=event)
+
    def _count_outage_alerts(self, alerts: List[int]) -> str:
        """
        Parameters:
@ -1171,6 +1204,10 @@ Model Info:
        await self._check_if_using_premium_email_feature(
            premium_user, email_logo_url, email_support_contact
        )
+        if email_logo_url is None:
+            email_logo_url = LITELLM_LOGO_URL
+        if email_support_contact is None:
+            email_support_contact = LITELLM_SUPPORT_CONTACT

        event_name = webhook_event.event_message
        recipient_email = webhook_event.user_email
@ -1271,6 +1308,11 @@ Model Info:
            premium_user, email_logo_url, email_support_contact
        )

+        if email_logo_url is None:
+            email_logo_url = LITELLM_LOGO_URL
+        if email_support_contact is None:
+            email_support_contact = LITELLM_SUPPORT_CONTACT
+
        event_name = webhook_event.event_message
        recipient_email = webhook_event.user_email
        user_name = webhook_event.user_id
@ -1401,7 +1443,9 @@ Model Info:
        if response.status_code == 200:
            pass
        else:
-            print("Error sending slack alert. Error=", response.text)  # noqa
+            verbose_proxy_logger.debug(
+                "Error sending slack alert. Error=", response.text
+            )

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        """Log deployment latency"""
@ -1421,6 +1465,8 @@ Model Info:
                        final_value = float(
                            response_s.total_seconds() / completion_tokens
                        )
+                if isinstance(final_value, timedelta):
+                    final_value = final_value.total_seconds()

                await self.async_update_daily_reports(
                    DeploymentMetrics(
--- a/litellm/integrations/traceloop.py
+++ b/litellm/integrations/traceloop.py
@ -1,114 +1,153 @@
+import traceback
+from litellm._logging import verbose_logger
+import litellm
+
+
 class TraceloopLogger:
    def __init__(self):
-        from traceloop.sdk.tracing.tracing import TracerWrapper
-        from traceloop.sdk import Traceloop
+        try:
+            from traceloop.sdk.tracing.tracing import TracerWrapper
+            from traceloop.sdk import Traceloop
+            from traceloop.sdk.instruments import Instruments
+        except ModuleNotFoundError as e:
+            verbose_logger.error(
+                f"Traceloop not installed, try running 'pip install traceloop-sdk' to fix this error: {e}\n{traceback.format_exc()}"
+            )

-        Traceloop.init(app_name="Litellm-Server", disable_batch=True)
+        Traceloop.init(
+            app_name="Litellm-Server",
+            disable_batch=True,
+            instruments=[
+                Instruments.CHROMA,
+                Instruments.PINECONE,
+                Instruments.WEAVIATE,
+                Instruments.LLAMA_INDEX,
+                Instruments.LANGCHAIN,
+            ],
+        )
        self.tracer_wrapper = TracerWrapper()

-    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
-        from opentelemetry.trace import SpanKind
+    def log_event(
+        self,
+        kwargs,
+        response_obj,
+        start_time,
+        end_time,
+        user_id,
+        print_verbose,
+        level="DEFAULT",
+        status_message=None,
+    ):
+        from opentelemetry import trace
+        from opentelemetry.trace import SpanKind, Status, StatusCode
        from opentelemetry.semconv.ai import SpanAttributes

        try:
+            print_verbose(
+                f"Traceloop Logging - Enters logging function for model {kwargs}"
+            )
+
            tracer = self.tracer_wrapper.get_tracer()

-            model = kwargs.get("model")
-
-            # LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
-            if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
-                return
-
            optional_params = kwargs.get("optional_params", {})
-            with tracer.start_as_current_span(
-                "litellm.completion",
-                kind=SpanKind.CLIENT,
-            ) as span:
-                if span.is_recording():
+            span = tracer.start_span(
+                "litellm.completion", kind=SpanKind.CLIENT, start_time=start_time
+            )
+
+            if span.is_recording():
+                span.set_attribute(
+                    SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
+                )
+                if "stop" in optional_params:
                    span.set_attribute(
-                        SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
+                        SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
+                        optional_params.get("stop"),
                    )
-                    if "stop" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
-                            optional_params.get("stop"),
-                        )
-                    if "frequency_penalty" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_FREQUENCY_PENALTY,
-                            optional_params.get("frequency_penalty"),
-                        )
-                    if "presence_penalty" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_PRESENCE_PENALTY,
-                            optional_params.get("presence_penalty"),
-                        )
-                    if "top_p" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
-                        )
-                    if "tools" in optional_params or "functions" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_REQUEST_FUNCTIONS,
-                            optional_params.get(
-                                "tools", optional_params.get("functions")
-                            ),
-                        )
-                    if "user" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_USER, optional_params.get("user")
-                        )
-                    if "max_tokens" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_REQUEST_MAX_TOKENS,
-                            kwargs.get("max_tokens"),
-                        )
-                    if "temperature" in optional_params:
-                        span.set_attribute(
-                            SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
-                        )
-
-                    for idx, prompt in enumerate(kwargs.get("messages")):
-                        span.set_attribute(
-                            f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
-                            prompt.get("role"),
-                        )
-                        span.set_attribute(
-                            f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
-                            prompt.get("content"),
-                        )
-
+                if "frequency_penalty" in optional_params:
                    span.set_attribute(
-                        SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
+                        SpanAttributes.LLM_FREQUENCY_PENALTY,
+                        optional_params.get("frequency_penalty"),
+                    )
+                if "presence_penalty" in optional_params:
+                    span.set_attribute(
+                        SpanAttributes.LLM_PRESENCE_PENALTY,
+                        optional_params.get("presence_penalty"),
+                    )
+                if "top_p" in optional_params:
+                    span.set_attribute(
+                        SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
+                    )
+                if "tools" in optional_params or "functions" in optional_params:
+                    span.set_attribute(
+                        SpanAttributes.LLM_REQUEST_FUNCTIONS,
+                        optional_params.get("tools", optional_params.get("functions")),
+                    )
+                if "user" in optional_params:
+                    span.set_attribute(
+                        SpanAttributes.LLM_USER, optional_params.get("user")
+                    )
+                if "max_tokens" in optional_params:
+                    span.set_attribute(
+                        SpanAttributes.LLM_REQUEST_MAX_TOKENS,
+                        kwargs.get("max_tokens"),
+                    )
+                if "temperature" in optional_params:
+                    span.set_attribute(
+                        SpanAttributes.LLM_REQUEST_TEMPERATURE,
+                        kwargs.get("temperature"),
                    )
-                    usage = response_obj.get("usage")
-                    if usage:
-                        span.set_attribute(
-                            SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
-                            usage.get("total_tokens"),
-                        )
-                        span.set_attribute(
-                            SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
-                            usage.get("completion_tokens"),
-                        )
-                        span.set_attribute(
-                            SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
-                            usage.get("prompt_tokens"),
-                        )

-                    for idx, choice in enumerate(response_obj.get("choices")):
-                        span.set_attribute(
-                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
-                            choice.get("finish_reason"),
-                        )
-                        span.set_attribute(
-                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
-                            choice.get("message").get("role"),
-                        )
-                        span.set_attribute(
-                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
-                            choice.get("message").get("content"),
-                        )
+                for idx, prompt in enumerate(kwargs.get("messages")):
+                    span.set_attribute(
+                        f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
+                        prompt.get("role"),
+                    )
+                    span.set_attribute(
+                        f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
+                        prompt.get("content"),
+                    )
+
+                span.set_attribute(
+                    SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
+                )
+                usage = response_obj.get("usage")
+                if usage:
+                    span.set_attribute(
+                        SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                        usage.get("total_tokens"),
+                    )
+                    span.set_attribute(
+                        SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+                        usage.get("completion_tokens"),
+                    )
+                    span.set_attribute(
+                        SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+                        usage.get("prompt_tokens"),
+                    )
+
+                for idx, choice in enumerate(response_obj.get("choices")):
+                    span.set_attribute(
+                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
+                        choice.get("finish_reason"),
+                    )
+                    span.set_attribute(
+                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
+                        choice.get("message").get("role"),
+                    )
+                    span.set_attribute(
+                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
+                        choice.get("message").get("content"),
+                    )
+
+            if (
+                level == "ERROR"
+                and status_message is not None
+                and isinstance(status_message, str)
+            ):
+                span.record_exception(Exception(status_message))
+                span.set_status(Status(StatusCode.ERROR, status_message))
+
+            span.end(end_time)

        except Exception as e:
            print_verbose(f"Traceloop Layer Error - {e}")
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -379,13 +379,12 @@ class AnthropicChatCompletion(BaseLLM):
        logger_fn=None,
        headers={},
    ):
-        self.async_handler = AsyncHTTPHandler(
-            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
+
+        async_handler = AsyncHTTPHandler(
+            timeout=httpx.Timeout(timeout=600.0, connect=20.0)
        )
        data["stream"] = True
-        response = await self.async_handler.post(
-            api_base, headers=headers, data=json.dumps(data), stream=True
-        )
+        response = await async_handler.post(api_base, headers=headers, json=data)

        if response.status_code != 200:
            raise AnthropicError(
@ -421,12 +420,10 @@ class AnthropicChatCompletion(BaseLLM):
        logger_fn=None,
        headers={},
    ) -> Union[ModelResponse, CustomStreamWrapper]:
-        self.async_handler = AsyncHTTPHandler(
+        async_handler = AsyncHTTPHandler(
            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
        )
-        response = await self.async_handler.post(
-            api_base, headers=headers, data=json.dumps(data)
-        )
+        response = await async_handler.post(api_base, headers=headers, json=data)
        if stream and _is_function_call:
            return self.process_streaming_response(
                model=model,
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -43,12 +43,13 @@ class AsyncHTTPHandler:
        self,
        url: str,
        data: Optional[Union[dict, str]] = None,  # type: ignore
+        json: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
        stream: bool = False,
    ):
        req = self.client.build_request(
-            "POST", url, data=data, params=params, headers=headers  # type: ignore
+            "POST", url, data=data, json=json, params=params, headers=headers  # type: ignore
        )
        response = await self.client.send(req, stream=stream)
        return response
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -45,6 +45,8 @@ class OllamaConfig:

    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7

+    - `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
+
    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"

    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaConfig:
    repeat_last_n: Optional[int] = None
    repeat_penalty: Optional[float] = None
    temperature: Optional[float] = None
+    seed: Optional[int] = None
    stop: Optional[list] = (
        None  # stop is a list based on this - https://github.com/ollama/ollama/pull/442
    )
@ -90,6 +93,7 @@ class OllamaConfig:
        repeat_last_n: Optional[int] = None,
        repeat_penalty: Optional[float] = None,
        temperature: Optional[float] = None,
+        seed: Optional[int] = None,
        stop: Optional[list] = None,
        tfs_z: Optional[float] = None,
        num_predict: Optional[int] = None,
@ -120,6 +124,44 @@ class OllamaConfig:
            )
            and v is not None
        }
+    def get_supported_openai_params(
+        self,
+    ):
+        return [
+            "max_tokens",
+            "stream",
+            "top_p",
+            "temperature",
+            "seed",
+            "frequency_penalty",
+            "stop",
+            "response_format",
+        ]
+
+# ollama wants plain base64 jpeg/png files as images.  strip any leading dataURI
+# and convert to jpeg if necessary.
+def _convert_image(image):
+    import base64, io
+    try:
+        from PIL import Image
+    except:
+        raise Exception(
+            "ollama image conversion failed please run `pip install Pillow`"
+        )
+
+    orig = image
+    if image.startswith("data:"):
+        image = image.split(",")[-1]
+    try:
+        image_data = Image.open(io.BytesIO(base64.b64decode(image)))
+        if image_data.format in ["JPEG", "PNG"]:
+            return image
+    except:
+        return orig
+    jpeg_image = io.BytesIO()
+    image_data.convert("RGB").save(jpeg_image, "JPEG")
+    jpeg_image.seek(0)
+    return base64.b64encode(jpeg_image.getvalue()).decode("utf-8")


 # ollama implementation
@ -158,7 +200,7 @@ def get_ollama_response(
    if format is not None:
        data["format"] = format
    if images is not None:
-        data["images"] = images
+        data["images"] = [_convert_image(image) for image in images]

    ## LOGGING
    logging_obj.pre_call(
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -45,6 +45,8 @@ class OllamaChatConfig:

    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7

+    - `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
+
    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"

    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaChatConfig:
    repeat_last_n: Optional[int] = None
    repeat_penalty: Optional[float] = None
    temperature: Optional[float] = None
+    seed: Optional[int] = None
    stop: Optional[list] = (
        None  # stop is a list based on this - https://github.com/ollama/ollama/pull/442
    )
@ -90,6 +93,7 @@ class OllamaChatConfig:
        repeat_last_n: Optional[int] = None,
        repeat_penalty: Optional[float] = None,
        temperature: Optional[float] = None,
+        seed: Optional[int] = None,
        stop: Optional[list] = None,
        tfs_z: Optional[float] = None,
        num_predict: Optional[int] = None,
@ -130,6 +134,7 @@ class OllamaChatConfig:
            "stream",
            "top_p",
            "temperature",
+            "seed",
            "frequency_penalty",
            "stop",
            "tools",
@ -146,6 +151,8 @@ class OllamaChatConfig:
                optional_params["stream"] = value
            if param == "temperature":
                optional_params["temperature"] = value
+            if param == "seed":
+                optional_params["seed"] = value
            if param == "top_p":
                optional_params["top_p"] = value
            if param == "frequency_penalty":
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -21,11 +21,12 @@ from litellm.utils import (
    TranscriptionResponse,
    TextCompletionResponse,
 )
-from typing import Callable, Optional
+from typing import Callable, Optional, Coroutine
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from openai import OpenAI, AsyncOpenAI
 from ..types.llms.openai import *
+import openai


 class OpenAIError(Exception):
@ -349,7 +350,6 @@ class OpenAIConfig:
            "top_p",
            "tools",
            "tool_choice",
-            "user",
            "function_call",
            "functions",
            "max_retries",
@ -362,6 +362,12 @@ class OpenAIConfig:
        ):  # gpt-4 does not support 'response_format'
            model_specific_params.append("response_format")

+        if (
+            model in litellm.open_ai_chat_completion_models
+        ) or model in litellm.open_ai_text_completion_models:
+            model_specific_params.append(
+                "user"
+            )  # user is not a param supported by all openai-compatible endpoints - e.g. azure ai
        return base_params + model_specific_params

    def map_openai_params(
@ -1085,8 +1091,8 @@ class OpenAIChatCompletion(BaseLLM):
        model_response: TranscriptionResponse,
        timeout: float,
        max_retries: int,
-        api_key: Optional[str] = None,
-        api_base: Optional[str] = None,
+        api_key: Optional[str],
+        api_base: Optional[str],
        client=None,
        logging_obj=None,
        atranscription: bool = False,
@ -1142,7 +1148,6 @@ class OpenAIChatCompletion(BaseLLM):
        max_retries=None,
        logging_obj=None,
    ):
-        response = None
        try:
            if client is None:
                openai_aclient = AsyncOpenAI(
@ -1176,6 +1181,95 @@ class OpenAIChatCompletion(BaseLLM):
            )
            raise e

+    def audio_speech(
+        self,
+        model: str,
+        input: str,
+        voice: str,
+        optional_params: dict,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        organization: Optional[str],
+        project: Optional[str],
+        max_retries: int,
+        timeout: Union[float, httpx.Timeout],
+        aspeech: Optional[bool] = None,
+        client=None,
+    ) -> HttpxBinaryResponseContent:
+
+        if aspeech is not None and aspeech == True:
+            return self.async_audio_speech(
+                model=model,
+                input=input,
+                voice=voice,
+                optional_params=optional_params,
+                api_key=api_key,
+                api_base=api_base,
+                organization=organization,
+                project=project,
+                max_retries=max_retries,
+                timeout=timeout,
+                client=client,
+            )  # type: ignore
+
+        if client is None:
+            openai_client = OpenAI(
+                api_key=api_key,
+                base_url=api_base,
+                organization=organization,
+                project=project,
+                http_client=litellm.client_session,
+                timeout=timeout,
+                max_retries=max_retries,
+            )
+        else:
+            openai_client = client
+
+        response = openai_client.audio.speech.create(
+            model=model,
+            voice=voice,  # type: ignore
+            input=input,
+            **optional_params,
+        )
+        return response
+
+    async def async_audio_speech(
+        self,
+        model: str,
+        input: str,
+        voice: str,
+        optional_params: dict,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        organization: Optional[str],
+        project: Optional[str],
+        max_retries: int,
+        timeout: Union[float, httpx.Timeout],
+        client=None,
+    ) -> HttpxBinaryResponseContent:
+
+        if client is None:
+            openai_client = AsyncOpenAI(
+                api_key=api_key,
+                base_url=api_base,
+                organization=organization,
+                project=project,
+                http_client=litellm.aclient_session,
+                timeout=timeout,
+                max_retries=max_retries,
+            )
+        else:
+            openai_client = client
+
+        response = await openai_client.audio.speech.create(
+            model=model,
+            voice=voice,  # type: ignore
+            input=input,
+            **optional_params,
+        )
+
+        return response
+
    async def ahealth_check(
        self,
        model: Optional[str],
@ -1497,6 +1591,322 @@ class OpenAITextCompletion(BaseLLM):
            yield transformed_chunk


+class OpenAIFilesAPI(BaseLLM):
+    """
+    OpenAI methods to support for batches
+    - create_file()
+    - retrieve_file()
+    - list_files()
+    - delete_file()
+    - file_content()
+    - update_file()
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_openai_client(
+        self,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+        _is_async: bool = False,
+    ) -> Optional[Union[OpenAI, AsyncOpenAI]]:
+        received_args = locals()
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
+        if client is None:
+            data = {}
+            for k, v in received_args.items():
+                if k == "self" or k == "client" or k == "_is_async":
+                    pass
+                elif k == "api_base" and v is not None:
+                    data["base_url"] = v
+                elif v is not None:
+                    data[k] = v
+            if _is_async is True:
+                openai_client = AsyncOpenAI(**data)
+            else:
+                openai_client = OpenAI(**data)  # type: ignore
+        else:
+            openai_client = client
+
+        return openai_client
+
+    async def acreate_file(
+        self,
+        create_file_data: CreateFileRequest,
+        openai_client: AsyncOpenAI,
+    ) -> FileObject:
+        response = await openai_client.files.create(**create_file_data)
+        return response
+
+    def create_file(
+        self,
+        _is_async: bool,
+        create_file_data: CreateFileRequest,
+        api_base: str,
+        api_key: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+    ) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+            organization=organization,
+            client=client,
+            _is_async=_is_async,
+        )
+        if openai_client is None:
+            raise ValueError(
+                "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+            )
+
+        if _is_async is True:
+            if not isinstance(openai_client, AsyncOpenAI):
+                raise ValueError(
+                    "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+                )
+            return self.acreate_file(  # type: ignore
+                create_file_data=create_file_data, openai_client=openai_client
+            )
+        response = openai_client.files.create(**create_file_data)
+        return response
+
+    async def afile_content(
+        self,
+        file_content_request: FileContentRequest,
+        openai_client: AsyncOpenAI,
+    ) -> HttpxBinaryResponseContent:
+        response = await openai_client.files.content(**file_content_request)
+        return response
+
+    def file_content(
+        self,
+        _is_async: bool,
+        file_content_request: FileContentRequest,
+        api_base: str,
+        api_key: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+    ) -> Union[
+        HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
+    ]:
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+            organization=organization,
+            client=client,
+            _is_async=_is_async,
+        )
+        if openai_client is None:
+            raise ValueError(
+                "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+            )
+
+        if _is_async is True:
+            if not isinstance(openai_client, AsyncOpenAI):
+                raise ValueError(
+                    "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+                )
+            return self.afile_content(  # type: ignore
+                file_content_request=file_content_request,
+                openai_client=openai_client,
+            )
+        response = openai_client.files.content(**file_content_request)
+
+        return response
+
+
+class OpenAIBatchesAPI(BaseLLM):
+    """
+    OpenAI methods to support for batches
+    - create_batch()
+    - retrieve_batch()
+    - cancel_batch()
+    - list_batch()
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_openai_client(
+        self,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+        _is_async: bool = False,
+    ) -> Optional[Union[OpenAI, AsyncOpenAI]]:
+        received_args = locals()
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
+        if client is None:
+            data = {}
+            for k, v in received_args.items():
+                if k == "self" or k == "client" or k == "_is_async":
+                    pass
+                elif k == "api_base" and v is not None:
+                    data["base_url"] = v
+                elif v is not None:
+                    data[k] = v
+            if _is_async is True:
+                openai_client = AsyncOpenAI(**data)
+            else:
+                openai_client = OpenAI(**data)  # type: ignore
+        else:
+            openai_client = client
+
+        return openai_client
+
+    async def acreate_batch(
+        self,
+        create_batch_data: CreateBatchRequest,
+        openai_client: AsyncOpenAI,
+    ) -> Batch:
+        response = await openai_client.batches.create(**create_batch_data)
+        return response
+
+    def create_batch(
+        self,
+        _is_async: bool,
+        create_batch_data: CreateBatchRequest,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+    ) -> Union[Batch, Coroutine[Any, Any, Batch]]:
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+            organization=organization,
+            client=client,
+            _is_async=_is_async,
+        )
+        if openai_client is None:
+            raise ValueError(
+                "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+            )
+
+        if _is_async is True:
+            if not isinstance(openai_client, AsyncOpenAI):
+                raise ValueError(
+                    "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+                )
+            return self.acreate_batch(  # type: ignore
+                create_batch_data=create_batch_data, openai_client=openai_client
+            )
+        response = openai_client.batches.create(**create_batch_data)
+        return response
+
+    async def aretrieve_batch(
+        self,
+        retrieve_batch_data: RetrieveBatchRequest,
+        openai_client: AsyncOpenAI,
+    ) -> Batch:
+        response = await openai_client.batches.retrieve(**retrieve_batch_data)
+        return response
+
+    def retrieve_batch(
+        self,
+        _is_async: bool,
+        retrieve_batch_data: RetrieveBatchRequest,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[OpenAI] = None,
+    ):
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+            organization=organization,
+            client=client,
+            _is_async=_is_async,
+        )
+        if openai_client is None:
+            raise ValueError(
+                "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+            )
+
+        if _is_async is True:
+            if not isinstance(openai_client, AsyncOpenAI):
+                raise ValueError(
+                    "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+                )
+            return self.aretrieve_batch(  # type: ignore
+                retrieve_batch_data=retrieve_batch_data, openai_client=openai_client
+            )
+        response = openai_client.batches.retrieve(**retrieve_batch_data)
+        return response
+
+    def cancel_batch(
+        self,
+        _is_async: bool,
+        cancel_batch_data: CancelBatchRequest,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        max_retries: Optional[int],
+        organization: Optional[str],
+        client: Optional[OpenAI] = None,
+    ):
+        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+            api_key=api_key,
+            api_base=api_base,
+            timeout=timeout,
+            max_retries=max_retries,
+            organization=organization,
+            client=client,
+            _is_async=_is_async,
+        )
+        if openai_client is None:
+            raise ValueError(
+                "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+            )
+        response = openai_client.batches.cancel(**cancel_batch_data)
+        return response
+
+    # def list_batch(
+    #     self,
+    #     list_batch_data: ListBatchRequest,
+    #     api_key: Optional[str],
+    #     api_base: Optional[str],
+    #     timeout: Union[float, httpx.Timeout],
+    #     max_retries: Optional[int],
+    #     organization: Optional[str],
+    #     client: Optional[OpenAI] = None,
+    # ):
+    #     openai_client: OpenAI = self.get_openai_client(
+    #         api_key=api_key,
+    #         api_base=api_base,
+    #         timeout=timeout,
+    #         max_retries=max_retries,
+    #         organization=organization,
+    #         client=client,
+    #     )
+    #     response = openai_client.batches.list(**list_batch_data)
+    #     return response
+
+
 class OpenAIAssistantsAPI(BaseLLM):
    def __init__(self) -> None:
        super().__init__()
--- a/litellm/main.py
+++ b/litellm/main.py
@ -14,7 +14,6 @@ from functools import partial
 import dotenv, traceback, random, asyncio, time, contextvars
 from copy import deepcopy
 import httpx
-
 import litellm
 from ._logging import verbose_logger
 from litellm import (  # type: ignore
@ -92,6 +91,7 @@ import tiktoken
 from concurrent.futures import ThreadPoolExecutor
 from typing import Callable, List, Optional, Dict, Union, Mapping
 from .caching import enable_cache, disable_cache, update_cache
+from .types.llms.openai import HttpxBinaryResponseContent

 encoding = tiktoken.get_encoding("cl100k_base")
 from litellm.utils import (
@ -680,6 +680,7 @@ def completion(
        "region_name",
        "allowed_model_region",
        "model_config",
+        "fastest_response",
    ]

    default_params = openai_params + litellm_params
@ -4130,6 +4131,24 @@ def transcription(
            max_retries=max_retries,
        )
    elif custom_llm_provider == "openai":
+        api_base = (
+            api_base
+            or litellm.api_base
+            or get_secret("OPENAI_API_BASE")
+            or "https://api.openai.com/v1"
+        )  # type: ignore
+        openai.organization = (
+            litellm.organization
+            or get_secret("OPENAI_ORGANIZATION")
+            or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+        )
+        # set API KEY
+        api_key = (
+            api_key
+            or litellm.api_key
+            or litellm.openai_key
+            or get_secret("OPENAI_API_KEY")
+        )  # type: ignore
        response = openai_chat_completions.audio_transcriptions(
            model=model,
            audio_file=file,
@ -4139,6 +4158,139 @@ def transcription(
            timeout=timeout,
            logging_obj=litellm_logging_obj,
            max_retries=max_retries,
+            api_base=api_base,
+            api_key=api_key,
+        )
+    return response
+
+
+@client
+async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
+    """
+    Calls openai tts endpoints.
+    """
+    loop = asyncio.get_event_loop()
+    model = args[0] if len(args) > 0 else kwargs["model"]
+    ### PASS ARGS TO Image Generation ###
+    kwargs["aspeech"] = True
+    custom_llm_provider = kwargs.get("custom_llm_provider", None)
+    try:
+        # Use a partial function to pass your keyword arguments
+        func = partial(speech, *args, **kwargs)
+
+        # Add the context to the function
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+
+        _, custom_llm_provider, _, _ = get_llm_provider(
+            model=model, api_base=kwargs.get("api_base", None)
+        )
+
+        # Await normally
+        init_response = await loop.run_in_executor(None, func_with_context)
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            # Call the synchronous function using run_in_executor
+            response = await loop.run_in_executor(None, func_with_context)
+        return response  # type: ignore
+    except Exception as e:
+        custom_llm_provider = custom_llm_provider or "openai"
+        raise exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=args,
+            extra_kwargs=kwargs,
+        )
+
+
+@client
+def speech(
+    model: str,
+    input: str,
+    voice: str,
+    api_key: Optional[str] = None,
+    api_base: Optional[str] = None,
+    organization: Optional[str] = None,
+    project: Optional[str] = None,
+    max_retries: Optional[int] = None,
+    metadata: Optional[dict] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    response_format: Optional[str] = None,
+    speed: Optional[int] = None,
+    client=None,
+    headers: Optional[dict] = None,
+    custom_llm_provider: Optional[str] = None,
+    aspeech: Optional[bool] = None,
+    **kwargs,
+) -> HttpxBinaryResponseContent:
+
+    model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)  # type: ignore
+
+    optional_params = {}
+    if response_format is not None:
+        optional_params["response_format"] = response_format
+    if speed is not None:
+        optional_params["speed"] = speed  # type: ignore
+
+    if timeout is None:
+        timeout = litellm.request_timeout
+
+    if max_retries is None:
+        max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES
+    response: Optional[HttpxBinaryResponseContent] = None
+    if custom_llm_provider == "openai":
+        api_base = (
+            api_base  # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
+            or litellm.api_base
+            or get_secret("OPENAI_API_BASE")
+            or "https://api.openai.com/v1"
+        )  # type: ignore
+        # set API KEY
+        api_key = (
+            api_key
+            or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
+            or litellm.openai_key
+            or get_secret("OPENAI_API_KEY")
+        )  # type: ignore
+
+        organization = (
+            organization
+            or litellm.organization
+            or get_secret("OPENAI_ORGANIZATION")
+            or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+        )  # type: ignore
+
+        project = (
+            project
+            or litellm.project
+            or get_secret("OPENAI_PROJECT")
+            or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
+        )  # type: ignore
+
+        headers = headers or litellm.headers
+
+        response = openai_chat_completions.audio_speech(
+            model=model,
+            input=input,
+            voice=voice,
+            optional_params=optional_params,
+            api_key=api_key,
+            api_base=api_base,
+            organization=organization,
+            project=project,
+            max_retries=max_retries,
+            timeout=timeout,
+            client=client,  # pass AsyncOpenAI, OpenAI client
+            aspeech=aspeech,
+        )
+
+    if response is None:
+        raise Exception(
+            "Unable to map the custom llm provider={} to a known provider={}.".format(
+                custom_llm_provider, litellm.provider_list
+            )
        )
    return response

--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1265,8 +1265,8 @@
        "max_tokens": 4096,
        "max_input_tokens": 200000,
        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.0000075,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000075,
        "litellm_provider": "vertex_ai-anthropic_models",
        "mode": "chat",
        "supports_function_calling": true,
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/134-4a7b43f992182f2c.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/134-4a7b43f992182f2c.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/359-f105a7fb61fe8110.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/359-f105a7fb61fe8110.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/440-b9a05f116e1a696d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/440-b9a05f116e1a696d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/608-d128caa3cfe973c1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/608-d128caa3cfe973c1.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-0e60605a9e4bc89a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-0e60605a9e4bc89a.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-aa3c10cf9bb31255.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-aa3c10cf9bb31255.js
@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-76d278f96a0e9768.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-76d278f96a0e9768.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-f610596e5fb3cce4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-f610596e5fb3cce4.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-73518c457ac08a68.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-73518c457ac08a68.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-766a329236c9a3f0.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/webpack-766a329236c9a3f0.js
@ -1 +1 @@
-!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5d93d4a9fa59d72f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
+!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/33354d8285fe572e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
--- a/litellm/proxy/_experimental/out/_next/static/css/33354d8285fe572e.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/33354d8285fe572e.css
--- a/litellm/proxy/_experimental/out/_next/static/css/5d93d4a9fa59d72f.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/5d93d4a9fa59d72f.css
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/33354d8285fe572e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45014,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"359\",\"static/chunks/359-f105a7fb61fe8110.js\",\"440\",\"static/chunks/440-b9a05f116e1a696d.js\",\"134\",\"static/chunks/134-4a7b43f992182f2c.js\",\"931\",\"static/chunks/app/page-f610596e5fb3cce4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/33354d8285fe572e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PcGFjo5-03lHREJ3E0k6y\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
+3:I[45014,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","359","static/chunks/359-f105a7fb61fe8110.js","440","static/chunks/440-b9a05f116e1a696d.js","134","static/chunks/134-4a7b43f992182f2c.js","931","static/chunks/app/page-f610596e5fb3cce4.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -0,0 +1,7 @@
+2:I[77831,[],""]
+3:I[87494,["359","static/chunks/359-f105a7fb61fe8110.js","134","static/chunks/134-4a7b43f992182f2c.js","418","static/chunks/app/model_hub/page-aa3c10cf9bb31255.js"],""]
+4:I[5613,[],""]
+5:I[31778,[],""]
+0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
+1:null
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -1,42 +1,16 @@
-general_settings:
-  alert_to_webhook_url:
-    budget_alerts: https://hooks.slack.com/services/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
-    daily_reports: https://hooks.slack.com/services/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
-    db_exceptions: https://hooks.slack.com/services/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
-    llm_exceptions: https://hooks.slack.com/services/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
-    llm_requests_hanging: https://hooks.slack.com/services/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
-    llm_too_slow: https://hooks.slack.com/services/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
-    outage_alerts: https://hooks.slack.com/services/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
-  alert_types:
-  - llm_exceptions
-  - llm_too_slow
-  - llm_requests_hanging
-  - budget_alerts
-  - db_exceptions
-  - daily_reports
-  - spend_reports
-  - cooldown_deployment
-  - new_model_added
-  - outage_alerts
-  alerting:
-  - slack
-  database_connection_pool_limit: 100
-  database_connection_timeout: 60
-  health_check_interval: 300
-  ui_access_mode: all
-# litellm_settings:
-#   json_logs: true
 model_list:
 - litellm_params:
    api_base: http://0.0.0.0:8080
    api_key: ''
    model: openai/my-fake-model
+    rpm: 800
  model_name: gpt-3.5-turbo-fake-model
 - litellm_params:
    api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
    api_key: os.environ/AZURE_EUROPE_API_KEY
    model: azure/gpt-35-turbo
-  model_name: gpt-3.5-turbo
+    rpm: 10
+  model_name: gpt-3.5-turbo-fake-model
 - litellm_params:
    api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
    api_key: os.environ/AZURE_API_KEY
@ -52,5 +26,8 @@ model_list:
    api_version: '2023-05-15'
    model: azure/chatgpt-v-2
  model_name: gpt-3.5-turbo
+- model_name: tts
+  litellm_params:
+    model: openai/tts-1
 router_settings:
  enable_pre_call_checks: true
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1,4 +1,4 @@
-from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
+from pydantic import BaseModel, Extra, Field, model_validator, Json, ConfigDict
 from dataclasses import fields
 import enum
 from typing import Optional, List, Union, Dict, Literal, Any
@ -7,6 +7,75 @@ import uuid, json, sys, os
 from litellm.types.router import UpdateRouterConfig
 from litellm.types.utils import ProviderField

+
+class LitellmUserRoles(str, enum.Enum):
+    """
+    Admin Roles:
+    PROXY_ADMIN: admin over the platform
+    PROXY_ADMIN_VIEW_ONLY: can login, view all own keys, view all spend
+
+    Internal User Roles:
+    INTERNAL_USER: can login, view/create/delete their own keys, view their spend
+    INTERNAL_USER_VIEW_ONLY: can login, view their own keys, view their own spend
+
+
+    Team Roles:
+    TEAM: used for JWT auth
+
+
+    Customer Roles:
+    CUSTOMER: External users -> these are customers
+
+    """
+
+    # Admin Roles
+    PROXY_ADMIN = "proxy_admin"
+    PROXY_ADMIN_VIEW_ONLY = "proxy_admin_viewer"
+
+    # Internal User Roles
+    INTERNAL_USER = "internal_user"
+    INTERNAL_USER_VIEW_ONLY = "internal_user_viewer"
+
+    # Team Roles
+    TEAM = "team"
+
+    # Customer Roles - External users of proxy
+    CUSTOMER = "customer"
+
+    def __str__(self):
+        return str(self.value)
+
+    @property
+    def description(self):
+        """
+        Descriptions for the enum values
+        """
+        descriptions = {
+            "proxy_admin": "admin over litellm proxy, has all permissions",
+            "proxy_admin_viewer": "view all keys, view all spend",
+            "internal_user": "view/create/delete their own keys, view their own spend",
+            "internal_user_viewer": "view their own keys, view their own spend",
+            "team": "team scope used for JWT auth",
+            "customer": "customer",
+        }
+        return descriptions.get(self.value, "")
+
+    @property
+    def ui_label(self):
+        """
+        UI labels for the enum values
+        """
+        ui_labels = {
+            "proxy_admin": "Admin (All Permissions)",
+            "proxy_admin_viewer": "Admin (View Only)",
+            "internal_user": "Internal User (Create/Delete/View)",
+            "internal_user_viewer": "Internal User (View Only)",
+            "team": "Team",
+            "customer": "Customer",
+        }
+        return ui_labels.get(self.value, "")
+
+
 AlertType = Literal[
    "llm_exceptions",
    "llm_too_slow",
@ -50,8 +119,7 @@ class LiteLLMBase(BaseModel):
            # if using pydantic v1
            return self.__fields_set__

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
@ -99,6 +167,14 @@ class LiteLLMRoutes(enum.Enum):
        # moderations
        "/moderations",
        "/v1/moderations",
+        # batches
+        "/v1/batches",
+        "/batches",
+        "/v1/batches{batch_id}",
+        "/batches{batch_id}",
+        # files
+        "/v1/files",
+        "/files",
        # models
        "/models",
        "/v1/models",
@ -272,7 +348,8 @@ class LiteLLMPromptInjectionParams(LiteLLMBase):
        description="Return rejected request error message as a string to the user. Default behaviour is to raise an exception.",
    )

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def check_llm_api_params(cls, values):
        llm_api_check = values.get("llm_api_check")
        if llm_api_check is True:
@ -330,8 +407,7 @@ class ProxyChatCompletionRequest(LiteLLMBase):
    deployment_id: Optional[str] = None
    request_timeout: Optional[int] = None

-    class Config:
-        extra = "allow"  # allow params not defined here, these fall in litellm.completion(**kwargs)
+    model_config = ConfigDict(extra="allow")  # allow params not defined here, these fall in litellm.completion(**kwargs)


 class ModelInfoDelete(LiteLLMBase):
@ -358,11 +434,10 @@ class ModelInfo(LiteLLMBase):
        ]
    ]

-    class Config:
-        extra = Extra.allow  # Allow extra fields
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=(), extra="allow")

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def set_model_info(cls, values):
        if values.get("id") is None:
            values.update({"id": str(uuid.uuid4())})
@ -393,10 +468,10 @@ class ModelParams(LiteLLMBase):
    litellm_params: dict
    model_info: ModelInfo

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def set_model_info(cls, values):
        if values.get("model_info") is None:
            values.update({"model_info": ModelInfo()})
@ -432,8 +507,7 @@ class GenerateKeyRequest(GenerateRequestBase):
        {}
    )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class GenerateKeyResponse(GenerateKeyRequest):
@ -443,7 +517,8 @@ class GenerateKeyResponse(GenerateKeyRequest):
    user_id: Optional[str] = None
    token_id: Optional[str] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def set_model_info(cls, values):
        if values.get("token") is not None:
            values.update({"key": values.get("token")})
@ -483,14 +558,22 @@ class LiteLLM_ModelTable(LiteLLMBase):
    created_by: str
    updated_by: str

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class NewUserRequest(GenerateKeyRequest):
    max_budget: Optional[float] = None
    user_email: Optional[str] = None
-    user_role: Optional[str] = None
+    user_role: Optional[
+        Literal[
+            LitellmUserRoles.PROXY_ADMIN,
+            LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
+            LitellmUserRoles.INTERNAL_USER,
+            LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
+            LitellmUserRoles.TEAM,
+            LitellmUserRoles.CUSTOMER,
+        ]
+    ] = None
    teams: Optional[list] = None
    organization_id: Optional[str] = None
    auto_create_key: bool = (
@ -509,10 +592,20 @@ class UpdateUserRequest(GenerateRequestBase):
    user_email: Optional[str] = None
    spend: Optional[float] = None
    metadata: Optional[dict] = None
-    user_role: Optional[str] = None
+    user_role: Optional[
+        Literal[
+            LitellmUserRoles.PROXY_ADMIN,
+            LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
+            LitellmUserRoles.INTERNAL_USER,
+            LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
+            LitellmUserRoles.TEAM,
+            LitellmUserRoles.CUSTOMER,
+        ]
+    ] = None
    max_budget: Optional[float] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def check_user_info(cls, values):
        if values.get("user_id") is None and values.get("user_email") is None:
            raise ValueError("Either user id or user email must be provided")
@ -536,7 +629,8 @@ class NewCustomerRequest(LiteLLMBase):
        None  # if no equivalent model in allowed region - default all requests to this model
    )

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def check_user_info(cls, values):
        if values.get("max_budget") is not None and values.get("budget_id") is not None:
            raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
@ -576,7 +670,8 @@ class Member(LiteLLMBase):
    user_id: Optional[str] = None
    user_email: Optional[str] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def check_user_info(cls, values):
        if values.get("user_id") is None and values.get("user_email") is None:
            raise ValueError("Either user id or user email must be provided")
@ -605,8 +700,7 @@ class TeamBase(LiteLLMBase):
 class NewTeamRequest(TeamBase):
    model_aliases: Optional[dict] = None

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class GlobalEndUsersSpend(LiteLLMBase):
@ -626,7 +720,8 @@ class TeamMemberDeleteRequest(LiteLLMBase):
    user_id: Optional[str] = None
    user_email: Optional[str] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def check_user_info(cls, values):
        if values.get("user_id") is None and values.get("user_email") is None:
            raise ValueError("Either user id or user email must be provided")
@ -692,10 +787,10 @@ class LiteLLM_TeamTable(TeamBase):
    budget_reset_at: Optional[datetime] = None
    model_id: Optional[int] = None

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def set_model_info(cls, values):
        dict_fields = [
            "metadata",
@ -731,8 +826,7 @@ class LiteLLM_BudgetTable(LiteLLMBase):
    model_max_budget: Optional[dict] = None
    budget_duration: Optional[str] = None

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
@ -745,8 +839,7 @@ class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
    team_id: Optional[str] = None
    budget_id: Optional[str] = None

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class NewOrganizationRequest(LiteLLM_BudgetTable):
@ -825,8 +918,7 @@ class KeyManagementSettings(LiteLLMBase):
 class TeamDefaultSettings(LiteLLMBase):
    team_id: str

-    class Config:
-        extra = "allow"  # allow params not defined here, these fall in litellm.completion(**kwargs)
+    model_config = ConfigDict(extra="allow")  # allow params not defined here, these fall in litellm.completion(**kwargs)


 class DynamoDBArgs(LiteLLMBase):
@ -988,8 +1080,7 @@ class ConfigYAML(LiteLLMBase):
        description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
    )

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class LiteLLM_VerificationToken(LiteLLMBase):
@ -1019,9 +1110,7 @@ class LiteLLM_VerificationToken(LiteLLMBase):

    org_id: Optional[str] = None  # org id for a given key

-    class Config:
-        protected_namespaces = ()
-
+    model_config = ConfigDict(protected_namespaces=())

 class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
    """
@ -1043,6 +1132,7 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
    end_user_id: Optional[str] = None
    end_user_tpm_limit: Optional[int] = None
    end_user_rpm_limit: Optional[int] = None
+    end_user_max_budget: Optional[float] = None


 class UserAPIKeyAuth(
@ -1053,10 +1143,20 @@ class UserAPIKeyAuth(
    """

    api_key: Optional[str] = None
-    user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
+    user_role: Optional[
+        Literal[
+            LitellmUserRoles.PROXY_ADMIN,
+            LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
+            LitellmUserRoles.INTERNAL_USER,
+            LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
+            LitellmUserRoles.TEAM,
+            LitellmUserRoles.CUSTOMER,
+        ]
+    ] = None
    allowed_model_region: Optional[Literal["eu"]] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def check_api_key(cls, values):
        if values.get("api_key") is not None:
            values.update({"token": hash_token(values.get("api_key"))})
@ -1083,7 +1183,8 @@ class LiteLLM_UserTable(LiteLLMBase):
    tpm_limit: Optional[int] = None
    rpm_limit: Optional[int] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def set_model_info(cls, values):
        if values.get("spend") is None:
            values.update({"spend": 0.0})
@ -1091,8 +1192,7 @@ class LiteLLM_UserTable(LiteLLMBase):
            values.update({"models": []})
        return values

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class LiteLLM_EndUserTable(LiteLLMBase):
@ -1104,14 +1204,14 @@ class LiteLLM_EndUserTable(LiteLLMBase):
    default_model: Optional[str] = None
    litellm_budget_table: Optional[LiteLLM_BudgetTable] = None

-    @root_validator(pre=True)
+    @model_validator(mode="before")
+    @classmethod
    def set_model_info(cls, values):
        if values.get("spend") is None:
            values.update({"spend": 0.0})
        return values

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class LiteLLM_SpendLogs(LiteLLMBase):
@ -1170,6 +1270,7 @@ class CallInfo(LiteLLMBase):
    spend: float
    max_budget: Optional[float] = None
    token: str = Field(description="Hashed value of that key")
+    customer_id: Optional[str] = None
    user_id: Optional[str] = None
    team_id: Optional[str] = None
    user_email: Optional[str] = None
@ -1180,9 +1281,13 @@ class CallInfo(LiteLLMBase):

 class WebhookEvent(CallInfo):
    event: Literal[
-        "budget_crossed", "threshold_crossed", "projected_limit_exceeded", "key_created"
+        "budget_crossed",
+        "threshold_crossed",
+        "projected_limit_exceeded",
+        "key_created",
+        "spend_tracked",
    ]
-    event_group: Literal["user", "key", "team", "proxy"]
+    event_group: Literal["internal_user", "key", "team", "proxy", "customer"]
    event_message: str  # human-readable description of event


@ -1215,6 +1320,7 @@ class InvitationModel(LiteLLMBase):
    updated_at: datetime
    updated_by: str

+
 class ConfigFieldInfo(LiteLLMBase):
    field_name: str
    field_value: Any
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -15,6 +15,7 @@ from litellm.proxy._types import (
    LiteLLM_TeamTable,
    LiteLLMRoutes,
    LiteLLM_OrganizationTable,
+    LitellmUserRoles,
 )
 from typing import Optional, Literal, Union
 from litellm.proxy.utils import PrismaClient
@ -133,7 +134,11 @@ def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool:


 def allowed_routes_check(
-    user_role: Literal["proxy_admin", "team", "user"],
+    user_role: Literal[
+        LitellmUserRoles.PROXY_ADMIN,
+        LitellmUserRoles.TEAM,
+        LitellmUserRoles.INTERNAL_USER,
+    ],
    user_route: str,
    litellm_proxy_roles: LiteLLM_JWTAuth,
 ) -> bool:
@ -141,14 +146,14 @@ def allowed_routes_check(
    Check if user -> not admin - allowed to access these routes
    """

-    if user_role == "proxy_admin":
+    if user_role == LitellmUserRoles.PROXY_ADMIN:
        is_allowed = _allowed_routes_check(
            user_route=user_route,
            allowed_routes=litellm_proxy_roles.admin_allowed_routes,
        )
        return is_allowed

-    elif user_role == "team":
+    elif user_role == LitellmUserRoles.TEAM:
        if litellm_proxy_roles.team_allowed_routes is None:
            """
            By default allow a team to call openai + info routes
@ -193,13 +198,27 @@ async def get_end_user_object(
    if end_user_id is None:
        return None
    _key = "end_user_id:{}".format(end_user_id)
+
+    def check_in_budget(end_user_obj: LiteLLM_EndUserTable):
+        if end_user_obj.litellm_budget_table is None:
+            return
+        end_user_budget = end_user_obj.litellm_budget_table.max_budget
+        if end_user_budget is not None and end_user_obj.spend > end_user_budget:
+            raise litellm.BudgetExceededError(
+                current_cost=end_user_obj.spend, max_budget=end_user_budget
+            )
+
    # check if in cache
    cached_user_obj = await user_api_key_cache.async_get_cache(key=_key)
    if cached_user_obj is not None:
        if isinstance(cached_user_obj, dict):
-            return LiteLLM_EndUserTable(**cached_user_obj)
+            return_obj = LiteLLM_EndUserTable(**cached_user_obj)
+            check_in_budget(end_user_obj=return_obj)
+            return return_obj
        elif isinstance(cached_user_obj, LiteLLM_EndUserTable):
-            return cached_user_obj
+            return_obj = cached_user_obj
+            check_in_budget(end_user_obj=return_obj)
+            return return_obj
    # else, check db
    try:
        response = await prisma_client.db.litellm_endusertable.find_unique(
@ -217,8 +236,12 @@ async def get_end_user_object(

        _response = LiteLLM_EndUserTable(**response.dict())

+        check_in_budget(end_user_obj=_response)
+
        return _response
    except Exception as e:  # if end-user not in db
+        if isinstance(e, litellm.BudgetExceededError):
+            raise e
        return None


--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -429,6 +429,19 @@ def run_server(

            proxy_config = ProxyConfig()
            _config = asyncio.run(proxy_config.get_config(config_file_path=config))
+            ### LITELLM SETTINGS ###
+            litellm_settings = _config.get("litellm_settings", None)
+            if (
+                litellm_settings is not None
+                and "json_logs" in litellm_settings
+                and litellm_settings["json_logs"] == True
+            ):
+                import litellm
+
+                litellm.json_logs = True
+
+                litellm._turn_on_json()
+            ### GENERAL SETTINGS ###
            general_settings = _config.get("general_settings", {})
            if general_settings is None:
                general_settings = {}
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -15,6 +15,7 @@ from litellm.proxy._types import (
    WebhookEvent,
    AlertType,
    ResetTeamBudgetRequest,
+    LitellmUserRoles,
 )
 from litellm.caching import DualCache, RedisCache
 from litellm.router import Deployment, ModelInfo, LiteLLM_Params
@ -2637,7 +2638,7 @@ def _is_user_proxy_admin(user_id_information: Optional[list]):
    _user = user_id_information[0]
    if (
        _user.get("user_role", None) is not None
-        and _user.get("user_role") == "proxy_admin"
+        and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
    ):
        return True

@ -2650,7 +2651,7 @@ def _is_user_proxy_admin(user_id_information: Optional[list]):

    if (
        _user.get("user_role", None) is not None
-        and _user.get("user_role") == "proxy_admin"
+        and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
    ):
        return True

--- a/litellm/router.py
+++ b/litellm/router.py
@ -103,7 +103,9 @@ class Router:
        allowed_fails: Optional[
            int
        ] = None,  # Number of times a deployment can failbefore being added to cooldown
-        cooldown_time: float = 1,  # (seconds) time to cooldown a deployment after failure
+        cooldown_time: Optional[
+            float
+        ] = None,  # (seconds) time to cooldown a deployment after failure
        routing_strategy: Literal[
            "simple-shuffle",
            "least-busy",
@ -248,7 +250,7 @@ class Router:
            )  # initialize an empty list - to allow _add_deployment and delete_deployment to work

        self.allowed_fails = allowed_fails or litellm.allowed_fails
-        self.cooldown_time = cooldown_time or 1
+        self.cooldown_time = cooldown_time or 60
        self.failed_calls = (
            InMemoryCache()
        )  # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
@ -356,7 +358,8 @@ class Router:
                raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
            if len(fallback_dict) != 1:
                raise ValueError(
-                    f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
+                    f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
+                )

    def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
        if routing_strategy == "least-busy":
@ -662,12 +665,40 @@ class Router:
            raise e

    async def abatch_completion(
-        self, models: List[str], messages: List[Dict[str, str]], **kwargs
+        self,
+        models: List[str],
+        messages: Union[List[Dict[str, str]], List[List[Dict[str, str]]]],
+        **kwargs,
    ):
        """
-        Async Batch Completion - Batch Process 1 request to multiple model_group on litellm.Router
-        Use this for sending the same request to N models
+        Async Batch Completion. Used for 2 scenarios:
+        1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
+        2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this
+
+        Example Request for 1 request to N models:
+        ```
+            response = await router.abatch_completion(
+                models=["gpt-3.5-turbo", "groq-llama"],
+                messages=[
+                    {"role": "user", "content": "is litellm becoming a better product ?"}
+                ],
+                max_tokens=15,
+            )
+        ```
+
+
+        Example Request for N requests to M models:
+        ```
+            response = await router.abatch_completion(
+                models=["gpt-3.5-turbo", "groq-llama"],
+                messages=[
+                    [{"role": "user", "content": "is litellm becoming a better product ?"}],
+                    [{"role": "user", "content": "who is this"}],
+                ],
+            )
+        ```
        """
+        ############## Helpers for async completion ##################

        async def _async_completion_no_exceptions(
            model: str, messages: List[Dict[str, str]], **kwargs
@ -680,17 +711,50 @@ class Router:
            except Exception as e:
                return e

-        _tasks = []
-        for model in models:
-            # add each task but if the task fails
-            _tasks.append(
-                _async_completion_no_exceptions(
-                    model=model, messages=messages, **kwargs
+        async def _async_completion_no_exceptions_return_idx(
+            model: str,
+            messages: List[Dict[str, str]],
+            idx: int,  # index of message this response corresponds to
+            **kwargs,
+        ):
+            """
+            Wrapper around self.async_completion that catches exceptions and returns them as a result
+            """
+            try:
+                return (
+                    await self.acompletion(model=model, messages=messages, **kwargs),
+                    idx,
                )
-            )
+            except Exception as e:
+                return e, idx

-        response = await asyncio.gather(*_tasks)
-        return response
+        ############## Helpers for async completion ##################
+
+        if isinstance(messages, list) and all(isinstance(m, dict) for m in messages):
+            _tasks = []
+            for model in models:
+                # add each task but if the task fails
+                _tasks.append(_async_completion_no_exceptions(model=model, messages=messages, **kwargs))  # type: ignore
+            response = await asyncio.gather(*_tasks)
+            return response
+        elif isinstance(messages, list) and all(isinstance(m, list) for m in messages):
+            _tasks = []
+            for idx, message in enumerate(messages):
+                for model in models:
+                    # Request Number X, Model Number Y
+                    _tasks.append(
+                        _async_completion_no_exceptions_return_idx(
+                            model=model, idx=idx, messages=message, **kwargs  # type: ignore
+                        )
+                    )
+            responses = await asyncio.gather(*_tasks)
+            final_responses: List[List[Any]] = [[] for _ in range(len(messages))]
+            for response in responses:
+                if isinstance(response, tuple):
+                    final_responses[response[1]].append(response[0])
+                else:
+                    final_responses[0].append(response)
+            return final_responses

    async def abatch_completion_one_model_multiple_requests(
        self, model: str, messages: List[List[Dict[str, str]]], **kwargs
@ -737,6 +801,101 @@ class Router:
        response = await asyncio.gather(*_tasks)
        return response

+    # fmt: off
+
+    @overload
+    async def abatch_completion_fastest_response(
+        self, model: str, messages: List[Dict[str, str]], stream: Literal[True], **kwargs
+    ) -> CustomStreamWrapper:
+        ...
+
+
+
+    @overload
+    async def abatch_completion_fastest_response(
+        self, model: str, messages: List[Dict[str, str]], stream: Literal[False] = False, **kwargs
+    ) -> ModelResponse:
+        ...
+
+    # fmt: on
+
+    async def abatch_completion_fastest_response(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        stream: bool = False,
+        **kwargs,
+    ):
+        """
+        model - List of comma-separated model names. E.g. model="gpt-4, gpt-3.5-turbo"
+
+        Returns fastest response from list of model names. OpenAI-compatible endpoint.
+        """
+        models = [m.strip() for m in model.split(",")]
+
+        async def _async_completion_no_exceptions(
+            model: str, messages: List[Dict[str, str]], stream: bool, **kwargs: Any
+        ) -> Union[ModelResponse, CustomStreamWrapper, Exception]:
+            """
+            Wrapper around self.acompletion that catches exceptions and returns them as a result
+            """
+            try:
+                return await self.acompletion(model=model, messages=messages, stream=stream, **kwargs)  # type: ignore
+            except asyncio.CancelledError:
+                verbose_router_logger.debug(
+                    "Received 'task.cancel'. Cancelling call w/ model={}.".format(model)
+                )
+                raise
+            except Exception as e:
+                return e
+
+        pending_tasks = []  # type: ignore
+
+        async def check_response(task: asyncio.Task):
+            nonlocal pending_tasks
+            try:
+                result = await task
+                if isinstance(result, (ModelResponse, CustomStreamWrapper)):
+                    verbose_router_logger.debug(
+                        "Received successful response. Cancelling other LLM API calls."
+                    )
+                    # If a desired response is received, cancel all other pending tasks
+                    for t in pending_tasks:
+                        t.cancel()
+                    return result
+            except Exception:
+                # Ignore exceptions, let the loop handle them
+                pass
+            finally:
+                # Remove the task from pending tasks if it finishes
+                try:
+                    pending_tasks.remove(task)
+                except KeyError:
+                    pass
+
+        for model in models:
+            task = asyncio.create_task(
+                _async_completion_no_exceptions(
+                    model=model, messages=messages, stream=stream, **kwargs
+                )
+            )
+            pending_tasks.append(task)
+
+        # Await the first task to complete successfully
+        while pending_tasks:
+            done, pending_tasks = await asyncio.wait(  # type: ignore
+                pending_tasks, return_when=asyncio.FIRST_COMPLETED
+            )
+            for completed_task in done:
+                result = await check_response(completed_task)
+                if result is not None:
+                    # Return the first successful result
+                    result._hidden_params["fastest_response_batch_completion"] = True
+                    return result
+
+        # If we exit the loop without returning, all tasks failed
+        raise Exception("All tasks failed")
+
    def image_generation(self, prompt: str, model: str, **kwargs):
        try:
            kwargs["model"] = model
@ -1045,6 +1204,84 @@ class Router:
                self.fail_calls[model_name] += 1
            raise e

+    async def aspeech(self, model: str, input: str, voice: str, **kwargs):
+        """
+        Example Usage:
+
+        ```
+        from litellm import Router
+        client = Router(model_list = [
+            {
+                "model_name": "tts",
+                "litellm_params": {
+                    "model": "tts-1",
+                },
+            },
+        ])
+
+        async with client.aspeech(
+            model="tts",
+            voice="alloy",
+            input="the quick brown fox jumped over the lazy dogs",
+            api_base=None,
+            api_key=None,
+            organization=None,
+            project=None,
+            max_retries=1,
+            timeout=600,
+            client=None,
+            optional_params={},
+        ) as response:
+            response.stream_to_file(speech_file_path)
+
+        ```
+        """
+        try:
+            kwargs["input"] = input
+            kwargs["voice"] = voice
+
+            deployment = await self.async_get_available_deployment(
+                model=model,
+                messages=[{"role": "user", "content": "prompt"}],
+                specific_deployment=kwargs.pop("specific_deployment", None),
+            )
+            kwargs.setdefault("metadata", {}).update(
+                {
+                    "deployment": deployment["litellm_params"]["model"],
+                    "model_info": deployment.get("model_info", {}),
+                }
+            )
+            kwargs["model_info"] = deployment.get("model_info", {})
+            data = deployment["litellm_params"].copy()
+            model_name = data["model"]
+            for k, v in self.default_litellm_params.items():
+                if (
+                    k not in kwargs
+                ):  # prioritize model-specific params > default router params
+                    kwargs[k] = v
+                elif k == "metadata":
+                    kwargs[k].update(v)
+
+            potential_model_client = self._get_client(
+                deployment=deployment, kwargs=kwargs, client_type="async"
+            )
+            # check if provided keys == client keys #
+            dynamic_api_key = kwargs.get("api_key", None)
+            if (
+                dynamic_api_key is not None
+                and potential_model_client is not None
+                and dynamic_api_key != potential_model_client.api_key
+            ):
+                model_client = None
+            else:
+                model_client = potential_model_client
+
+            response = await litellm.aspeech(**data, **kwargs)
+
+            return response
+        except Exception as e:
+            raise e
+
    async def amoderation(self, model: str, input: str, **kwargs):
        try:
            kwargs["model"] = model
@ -1693,7 +1930,8 @@ class Router:
                    )
                    await asyncio.sleep(_timeout)
            try:
-                original_exception.message += f"\nNumber Retries = {current_attempt}"
+                cooldown_deployments = await self._async_get_cooldown_deployments()
+                original_exception.message += f"\nNumber Retries = {current_attempt + 1}, Max Retries={num_retries}\nCooldown Deployments={cooldown_deployments}"
            except:
                pass
            raise original_exception
@ -1986,7 +2224,7 @@ class Router:
                    )
                )

-                if _time_to_cooldown < 0:
+                if _time_to_cooldown is None or _time_to_cooldown < 0:
                    # if the response headers did not read it -> set to default cooldown time
                    _time_to_cooldown = self.cooldown_time

@ -2082,6 +2320,9 @@ class Router:
                elif exception_status == 408:
                    return True

+                elif exception_status == 404:
+                    return True
+
                else:
                    # Do NOT cool down all other 4XX Errors
                    return False
@ -2107,6 +2348,7 @@ class Router:

        the exception is not one that should be immediately retried (e.g. 401)
        """
+        args = locals()
        if deployment is None:
            return

@ -2139,7 +2381,6 @@ class Router:
                )
                exception_status = 500
        _should_retry = litellm._should_retry(status_code=exception_status)
-
        if updated_fails > self.allowed_fails or _should_retry == False:
            # get the current cooldown list for that minute
            cooldown_key = f"{current_minute}:cooldown_models"  # group cooldown models by minute to reduce number of redis calls
@ -2453,8 +2694,17 @@ class Router:

            if "azure" in model_name:
                if api_base is None or not isinstance(api_base, str):
+                    filtered_litellm_params = {
+                        k: v
+                        for k, v in model["litellm_params"].items()
+                        if k != "api_key"
+                    }
+                    _filtered_model = {
+                        "model_name": model["model_name"],
+                        "litellm_params": filtered_litellm_params,
+                    }
                    raise ValueError(
-                        f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
+                        f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
                    )
                azure_ad_token = litellm_params.get("azure_ad_token")
                if azure_ad_token is not None:
@ -3076,6 +3326,8 @@ class Router:
                    supported_openai_params = litellm.get_supported_openai_params(
                        model=model, custom_llm_provider=llm_provider
                    )
+                    if supported_openai_params is None:
+                        supported_openai_params = []
                    model_info = ModelMapInfo(
                        max_tokens=None,
                        max_input_tokens=None,
@ -3546,7 +3798,6 @@ class Router:
        ## get healthy deployments
        ### get all deployments
        healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
-
        if len(healthy_deployments) == 0:
            # check if the user sent in a deployment name instead
            healthy_deployments = [
--- a/litellm/tests/openai_batch_completions.jsonl
+++ b/litellm/tests/openai_batch_completions.jsonl
@ -0,0 +1,2 @@
+{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
+{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
--- a/litellm/tests/test_add_update_models.py
+++ b/litellm/tests/test_add_update_models.py
@ -14,7 +14,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest, logging, asyncio
 import litellm, asyncio
-from litellm.proxy.proxy_server import add_new_model, update_model
+from litellm.proxy.proxy_server import add_new_model, update_model, LitellmUserRoles
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy.utils import PrismaClient, ProxyLogging

@ -90,7 +90,9 @@ async def test_add_new_model(prisma_client):
            ),
        ),
        user_api_key_dict=UserAPIKeyAuth(
-            user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            user_role=LitellmUserRoles.PROXY_ADMIN.value,
+            api_key="sk-1234",
+            user_id="1234",
        ),
    )

@ -137,7 +139,9 @@ async def test_add_update_model(prisma_client):
            ),
        ),
        user_api_key_dict=UserAPIKeyAuth(
-            user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            user_role=LitellmUserRoles.PROXY_ADMIN.value,
+            api_key="sk-1234",
+            user_id="1234",
        ),
    )

@ -166,7 +170,9 @@ async def test_add_update_model(prisma_client):
            ),
        ),
        user_api_key_dict=UserAPIKeyAuth(
-            user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            user_role=LitellmUserRoles.PROXY_ADMIN.value,
+            api_key="sk-1234",
+            user_id="1234",
        ),
    )

--- a/litellm/tests/test_alerting.py
+++ b/litellm/tests/test_alerting.py
@ -499,6 +499,36 @@ async def test_webhook_alerting(alerting_type):
        mock_send_alert.assert_awaited_once()


+# @pytest.mark.asyncio
+# async def test_webhook_customer_spend_event():
+#     """
+#     Test if customer spend is working as expected
+#     """
+#     slack_alerting = SlackAlerting(alerting=["webhook"])
+
+#     with patch.object(
+#         slack_alerting, "send_webhook_alert", new=AsyncMock()
+#     ) as mock_send_alert:
+#         user_info = {
+#             "token": "50e55ca5bfbd0759697538e8d23c0cd5031f52d9e19e176d7233b20c7c4d3403",
+#             "spend": 1,
+#             "max_budget": 0,
+#             "user_id": "ishaan@berri.ai",
+#             "user_email": "ishaan@berri.ai",
+#             "key_alias": "my-test-key",
+#             "projected_exceeded_date": "10/20/2024",
+#             "projected_spend": 200,
+#         }
+
+#         user_info = CallInfo(**user_info)
+#         for _ in range(50):
+#             await slack_alerting.budget_alerts(
+#                 type=alerting_type,
+#                 user_info=user_info,
+#             )
+#         mock_send_alert.assert_awaited_once()
+
+
@pytest.mark.parametrize(
    "model, api_base, llm_provider, vertex_project, vertex_location",
    [
--- a/litellm/tests/test_audio_speech.py
+++ b/litellm/tests/test_audio_speech.py
@ -0,0 +1,96 @@
+# What is this?
+## unit tests for openai tts endpoint
+
+import sys, os, asyncio, time, random, uuid
+import traceback
+from dotenv import load_dotenv
+
+load_dotenv()
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest
+import litellm, openai
+from pathlib import Path
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_audio_speech_litellm(sync_mode):
+    speech_file_path = Path(__file__).parent / "speech.mp3"
+
+    if sync_mode:
+        response = litellm.speech(
+            model="openai/tts-1",
+            voice="alloy",
+            input="the quick brown fox jumped over the lazy dogs",
+            api_base=None,
+            api_key=None,
+            organization=None,
+            project=None,
+            max_retries=1,
+            timeout=600,
+            client=None,
+            optional_params={},
+        )
+
+        from litellm.llms.openai import HttpxBinaryResponseContent
+
+        assert isinstance(response, HttpxBinaryResponseContent)
+    else:
+        response = await litellm.aspeech(
+            model="openai/tts-1",
+            voice="alloy",
+            input="the quick brown fox jumped over the lazy dogs",
+            api_base=None,
+            api_key=None,
+            organization=None,
+            project=None,
+            max_retries=1,
+            timeout=600,
+            client=None,
+            optional_params={},
+        )
+
+        from litellm.llms.openai import HttpxBinaryResponseContent
+
+        assert isinstance(response, HttpxBinaryResponseContent)
+
+
+@pytest.mark.parametrize("mode", ["iterator"])  # "file",
+@pytest.mark.asyncio
+async def test_audio_speech_router(mode):
+    speech_file_path = Path(__file__).parent / "speech.mp3"
+
+    from litellm import Router
+
+    client = Router(
+        model_list=[
+            {
+                "model_name": "tts",
+                "litellm_params": {
+                    "model": "openai/tts-1",
+                },
+            },
+        ]
+    )
+
+    response = await client.aspeech(
+        model="tts",
+        voice="alloy",
+        input="the quick brown fox jumped over the lazy dogs",
+        api_base=None,
+        api_key=None,
+        organization=None,
+        project=None,
+        max_retries=1,
+        timeout=600,
+        client=None,
+        optional_params={},
+    )
+
+    from litellm.llms.openai import HttpxBinaryResponseContent
+
+    assert isinstance(response, HttpxBinaryResponseContent)
--- a/litellm/tests/test_auth_checks.py
+++ b/litellm/tests/test_auth_checks.py
@ -0,0 +1,62 @@
+# What is this?
+## Tests if 'get_end_user_object' works as expected
+
+import sys, os, asyncio, time, random, uuid
+import traceback
+from dotenv import load_dotenv
+
+load_dotenv()
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest, litellm
+from litellm.proxy.auth.auth_checks import get_end_user_object
+from litellm.caching import DualCache
+from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
+from litellm.proxy.utils import PrismaClient
+
+
+@pytest.mark.parametrize("customer_spend, customer_budget", [(0, 10), (10, 0)])
+@pytest.mark.asyncio
+async def test_get_end_user_object(customer_spend, customer_budget):
+    """
+    Scenario 1: normal
+    Scenario 2: user over budget
+    """
+    end_user_id = "my-test-customer"
+    _budget = LiteLLM_BudgetTable(max_budget=customer_budget)
+    end_user_obj = LiteLLM_EndUserTable(
+        user_id=end_user_id,
+        spend=customer_spend,
+        litellm_budget_table=_budget,
+        blocked=False,
+    )
+    _cache = DualCache()
+    _key = "end_user_id:{}".format(end_user_id)
+    _cache.set_cache(key=_key, value=end_user_obj)
+    try:
+        await get_end_user_object(
+            end_user_id=end_user_id,
+            prisma_client="RANDOM VALUE",  # type: ignore
+            user_api_key_cache=_cache,
+        )
+        if customer_spend > customer_budget:
+            pytest.fail(
+                "Expected call to fail. Customer Spend={}, Customer Budget={}".format(
+                    customer_spend, customer_budget
+                )
+            )
+    except Exception as e:
+        if (
+            isinstance(e, litellm.BudgetExceededError)
+            and customer_spend > customer_budget
+        ):
+            pass
+        else:
+            pytest.fail(
+                "Expected call to work. Customer Spend={}, Customer Budget={}, Error={}".format(
+                    customer_spend, customer_budget, str(e)
+                )
+            )
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -7,7 +7,7 @@ import os, io

 sys.path.insert(
    0, os.path.abspath("../..")
-)  # Adds the parent-directory to the system path
+)  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm import embedding, completion, completion_cost, Timeout
@ -38,7 +38,7 @@ def reset_callbacks():
@pytest.mark.skip(reason="Local test")
 def test_response_model_none():
    """
-    Addresses: https://github.com/BerriAI/litellm/issues/2972
+    Addresses:https://github.com/BerriAI/litellm/issues/2972
    """
    x = completion(
        model="mymodel",
@ -1397,6 +1397,81 @@ def test_hf_classifier_task():
        pytest.fail(f"Error occurred: {str(e)}")


+def test_ollama_image():
+    """
+    Test that datauri prefixes are removed, JPEG/PNG images are passed
+    through, and other image formats are converted to JPEG.  Non-image
+    data is untouched.
+    """
+
+    import io, base64
+    from PIL import Image
+
+    def mock_post(url, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"Content-Type": "application/json"}
+        mock_response.json.return_value = {
+            # return the image in the response so that it can be tested
+            # against the original
+            "response": kwargs["json"]["images"]
+        }
+        return mock_response
+
+    def make_b64image(format):
+        image = Image.new(mode="RGB", size=(1, 1))
+        image_buffer = io.BytesIO()
+        image.save(image_buffer, format)
+        return base64.b64encode(image_buffer.getvalue()).decode("utf-8")
+
+    jpeg_image = make_b64image("JPEG")
+    webp_image = make_b64image("WEBP")
+    png_image = make_b64image("PNG")
+
+    base64_data = base64.b64encode(b"some random data")
+    datauri_base64_data = f"data:text/plain;base64,{base64_data}"
+
+    tests = [
+        # input                                    expected
+        [jpeg_image, jpeg_image],
+        [webp_image, None],
+        [png_image, png_image],
+        [f"data:image/jpeg;base64,{jpeg_image}", jpeg_image],
+        [f"data:image/webp;base64,{webp_image}", None],
+        [f"data:image/png;base64,{png_image}", png_image],
+        [datauri_base64_data, datauri_base64_data],
+    ]
+
+    for test in tests:
+        try:
+            with patch("requests.post", side_effect=mock_post):
+                response = completion(
+                    model="ollama/llava",
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": "Whats in this image?"},
+                                {
+                                    "type": "image_url",
+                                    "image_url": {"url": test[0]},
+                                },
+                            ],
+                        }
+                    ],
+                )
+                if not test[1]:
+                    # the conversion process may not always generate the same image,
+                    # so just check for a JPEG image when a conversion was done.
+                    image_data = response["choices"][0]["message"]["content"][0]
+                    image = Image.open(io.BytesIO(base64.b64decode(image_data)))
+                    assert image.format == "JPEG"
+                else:
+                    assert response["choices"][0]["message"]["content"][0] == test[1]
+        except Exception as e:
+            pytest.fail(f"Error occurred: {e}")
+
+
 ########################### End of Hugging Face Tests ##############################################
 # def test_completion_hf_api():
 # # failing on circle-ci commenting out
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@ -13,7 +13,7 @@ sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the, system path
 import pytest, litellm
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 from litellm.proxy.proxy_server import ProxyConfig
 from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
 from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
@ -26,8 +26,7 @@ class DBModel(BaseModel):
    model_info: dict
    litellm_params: dict

-    class Config:
-        protected_namespaces = ()
+    config_dict: ConfigDict = ConfigDict(protected_namespaces=())


@pytest.mark.asyncio
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -61,6 +61,7 @@ from litellm.proxy.proxy_server import (
    audio_transcriptions,
    moderations,
    model_list,
+    LitellmUserRoles,
 )
 from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 from litellm._logging import verbose_proxy_logger
@ -137,7 +138,9 @@ async def test_new_user_response(prisma_client):
                team_id=_team_id,
            ),
            user_api_key_dict=UserAPIKeyAuth(
-                user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+                user_role=LitellmUserRoles.PROXY_ADMIN,
+                api_key="sk-1234",
+                user_id="1234",
            ),
        )

@ -206,7 +209,7 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
            await litellm.proxy.proxy_server.prisma_client.connect()
            from litellm.proxy.proxy_server import user_api_key_cache

-            request = NewUserRequest(user_role="app_owner")
+            request = NewUserRequest(user_role=LitellmUserRoles.INTERNAL_USER)
            key = await new_user(request)
            print(key)
            user_id = key.user_id
@ -215,7 +218,7 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
            new_user_info = await user_info(user_id=user_id)
            new_user_info = new_user_info["user_info"]
            print("new_user_info=", new_user_info)
-            assert new_user_info.user_role == "app_owner"
+            assert new_user_info.user_role == LitellmUserRoles.INTERNAL_USER
            assert new_user_info.user_id == user_id

            generated_key = key.key
@ -363,7 +366,8 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
        )

        new_team_response = await new_team(
-            data=team_request, user_api_key_dict=UserAPIKeyAuth(user_role="proxy_admin")
+            data=team_request,
+            user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
        )
        print("new_team_response", new_team_response)
        created_team_id = new_team_response["team_id"]
@ -559,7 +563,7 @@ def test_call_with_end_user_over_budget(prisma_client):
        asyncio.run(test())
    except Exception as e:
        error_detail = e.message
-        assert "Authentication Error, ExceededBudget:" in error_detail
+        assert "Budget has been exceeded! Current" in error_detail
        print(vars(e))


@ -922,7 +926,7 @@ def test_delete_key(prisma_client):
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print(f"result: {result}")
-            result.user_role = "proxy_admin"
+            result.user_role = LitellmUserRoles.PROXY_ADMIN
            # delete the key
            result_delete_key = await delete_key_fn(
                data=delete_key_request, user_api_key_dict=result
@ -972,7 +976,7 @@ def test_delete_key_auth(prisma_client):
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print(f"result: {result}")
-            result.user_role = "proxy_admin"
+            result.user_role = LitellmUserRoles.PROXY_ADMIN

            result_delete_key = await delete_key_fn(
                data=delete_key_request, user_api_key_dict=result
@ -1044,7 +1048,7 @@ def test_generate_and_call_key_info(prisma_client):
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print(f"result: {result}")
-            result.user_role = "proxy_admin"
+            result.user_role = LitellmUserRoles.PROXY_ADMIN

            result_delete_key = await delete_key_fn(
                data=delete_key_request, user_api_key_dict=result
@ -1078,7 +1082,9 @@ def test_generate_and_update_key(prisma_client):
                    team_id=_team_1,
                ),
                user_api_key_dict=UserAPIKeyAuth(
-                    user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+                    user_role=LitellmUserRoles.PROXY_ADMIN,
+                    api_key="sk-1234",
+                    user_id="1234",
                ),
            )

@ -1088,7 +1094,9 @@ def test_generate_and_update_key(prisma_client):
                    team_id=_team_2,
                ),
                user_api_key_dict=UserAPIKeyAuth(
-                    user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+                    user_role=LitellmUserRoles.PROXY_ADMIN,
+                    api_key="sk-1234",
+                    user_id="1234",
                ),
            )

@ -1158,7 +1166,7 @@ def test_generate_and_update_key(prisma_client):
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print(f"result: {result}")
-            result.user_role = "proxy_admin"
+            result.user_role = LitellmUserRoles.PROXY_ADMIN

            result_delete_key = await delete_key_fn(
                data=delete_key_request, user_api_key_dict=result
@ -2038,7 +2046,9 @@ async def test_master_key_hashing(prisma_client):
        await new_team(
            NewTeamRequest(team_id=_team_id),
            user_api_key_dict=UserAPIKeyAuth(
-                user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+                user_role=LitellmUserRoles.PROXY_ADMIN,
+                api_key="sk-1234",
+                user_id="1234",
            ),
        )

@ -2076,7 +2086,7 @@ async def test_reset_spend_authentication(prisma_client):
    """
    1. Test master key can access this route  -> ONLY MASTER KEY SHOULD BE ABLE TO RESET SPEND
    2. Test that non-master key gets rejected
-    3. Test that non-master key with role == "proxy_admin" or admin gets rejected
+    3. Test that non-master key with role == LitellmUserRoles.PROXY_ADMIN or admin gets rejected
    """

    print("prisma client=", prisma_client)
@ -2121,10 +2131,10 @@ async def test_reset_spend_authentication(prisma_client):
            in e.message
        )

-    # Test 3 - Non-Master Key with role == "proxy_admin" or admin
+    # Test 3 - Non-Master Key with role == LitellmUserRoles.PROXY_ADMIN or admin
    _response = await new_user(
        data=NewUserRequest(
-            user_role="proxy_admin",
+            user_role=LitellmUserRoles.PROXY_ADMIN,
            tpm_limit=20,
        )
    )
@ -2174,7 +2184,9 @@ async def test_create_update_team(prisma_client):
            rpm_limit=20,
        ),
        user_api_key_dict=UserAPIKeyAuth(
-            user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+            api_key="sk-1234",
+            user_id="1234",
        ),
    )

@ -2200,7 +2212,9 @@ async def test_create_update_team(prisma_client):
            rpm_limit=30,
        ),
        user_api_key_dict=UserAPIKeyAuth(
-            user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+            api_key="sk-1234",
+            user_id="1234",
        ),
    )

--- a/litellm/tests/test_openai_batches.py
+++ b/litellm/tests/test_openai_batches.py
@ -0,0 +1,161 @@
+# What is this?
+## Unit Tests for OpenAI Batches API
+import sys, os, json
+import traceback
+import asyncio
+from dotenv import load_dotenv
+
+load_dotenv()
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest, logging, asyncio
+import litellm
+from litellm import (
+    create_batch,
+    create_file,
+)
+import time
+
+
+def test_create_batch():
+    """
+    1. Create File for Batch completion
+    2. Create Batch Request
+    3. Retrieve the specific batch
+    """
+    file_name = "openai_batch_completions.jsonl"
+    _current_dir = os.path.dirname(os.path.abspath(__file__))
+    file_path = os.path.join(_current_dir, file_name)
+
+    file_obj = litellm.create_file(
+        file=open(file_path, "rb"),
+        purpose="batch",
+        custom_llm_provider="openai",
+    )
+    print("Response from creating file=", file_obj)
+
+    batch_input_file_id = file_obj.id
+    assert (
+        batch_input_file_id is not None
+    ), "Failed to create file, expected a non null file_id but got {batch_input_file_id}"
+
+    create_batch_response = litellm.create_batch(
+        completion_window="24h",
+        endpoint="/v1/chat/completions",
+        input_file_id=batch_input_file_id,
+        custom_llm_provider="openai",
+        metadata={"key1": "value1", "key2": "value2"},
+    )
+
+    print("response from litellm.create_batch=", create_batch_response)
+
+    assert (
+        create_batch_response.id is not None
+    ), f"Failed to create batch, expected a non null batch_id but got {create_batch_response.id}"
+    assert (
+        create_batch_response.endpoint == "/v1/chat/completions"
+    ), f"Failed to create batch, expected endpoint to be /v1/chat/completions but got {create_batch_response.endpoint}"
+    assert (
+        create_batch_response.input_file_id == batch_input_file_id
+    ), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
+
+    retrieved_batch = litellm.retrieve_batch(
+        batch_id=create_batch_response.id, custom_llm_provider="openai"
+    )
+    print("retrieved batch=", retrieved_batch)
+    # just assert that we retrieved a non None batch
+
+    assert retrieved_batch.id == create_batch_response.id
+
+    file_content = litellm.file_content(
+        file_id=batch_input_file_id, custom_llm_provider="openai"
+    )
+
+    result = file_content.content
+
+    result_file_name = "batch_job_results_furniture.jsonl"
+
+    with open(result_file_name, "wb") as file:
+        file.write(result)
+
+    pass
+
+
+@pytest.mark.asyncio()
+async def test_async_create_batch():
+    """
+    1. Create File for Batch completion
+    2. Create Batch Request
+    3. Retrieve the specific batch
+    """
+    print("Testing async create batch")
+
+    file_name = "openai_batch_completions.jsonl"
+    _current_dir = os.path.dirname(os.path.abspath(__file__))
+    file_path = os.path.join(_current_dir, file_name)
+    file_obj = await litellm.acreate_file(
+        file=open(file_path, "rb"),
+        purpose="batch",
+        custom_llm_provider="openai",
+    )
+    print("Response from creating file=", file_obj)
+
+    batch_input_file_id = file_obj.id
+    assert (
+        batch_input_file_id is not None
+    ), "Failed to create file, expected a non null file_id but got {batch_input_file_id}"
+
+    create_batch_response = await litellm.acreate_batch(
+        completion_window="24h",
+        endpoint="/v1/chat/completions",
+        input_file_id=batch_input_file_id,
+        custom_llm_provider="openai",
+        metadata={"key1": "value1", "key2": "value2"},
+    )
+
+    print("response from litellm.create_batch=", create_batch_response)
+
+    assert (
+        create_batch_response.id is not None
+    ), f"Failed to create batch, expected a non null batch_id but got {create_batch_response.id}"
+    assert (
+        create_batch_response.endpoint == "/v1/chat/completions"
+    ), f"Failed to create batch, expected endpoint to be /v1/chat/completions but got {create_batch_response.endpoint}"
+    assert (
+        create_batch_response.input_file_id == batch_input_file_id
+    ), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
+
+    await asyncio.sleep(1)
+
+    retrieved_batch = await litellm.aretrieve_batch(
+        batch_id=create_batch_response.id, custom_llm_provider="openai"
+    )
+    print("retrieved batch=", retrieved_batch)
+    # just assert that we retrieved a non None batch
+
+    assert retrieved_batch.id == create_batch_response.id
+
+    # try to get file content for our original file
+
+    file_content = await litellm.afile_content(
+        file_id=batch_input_file_id, custom_llm_provider="openai"
+    )
+
+    print("file content = ", file_content)
+
+    # # write this file content to a file
+    # with open("file_content.json", "w") as f:
+    #     json.dump(file_content, f)
+
+
+def test_retrieve_batch():
+    pass
+
+
+def test_cancel_batch():
+    pass
+
+
+def test_list_batch():
+    pass
--- a/litellm/tests/test_optional_params.py
+++ b/litellm/tests/test_optional_params.py
@ -97,6 +97,18 @@ def test_databricks_optional_params():
    assert "user" not in optional_params


+def test_azure_ai_mistral_optional_params():
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="mistral-large-latest",
+        user="John",
+        custom_llm_provider="openai",
+        max_tokens=10,
+        temperature=0.2,
+    )
+    assert "user" not in optional_params
+
+
 def test_azure_gpt_optional_params_gpt_vision():
    # for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
    optional_params = litellm.utils.get_optional_params(
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -19,6 +19,25 @@ import os, httpx
 load_dotenv()


+def test_router_sensitive_keys():
+    try:
+        router = Router(
+            model_list=[
+                {
+                    "model_name": "gpt-3.5-turbo",  # openai model name
+                    "litellm_params": {  # params for litellm completion/embedding call
+                        "model": "azure/chatgpt-v-2",
+                        "api_key": "special-key",
+                    },
+                    "model_info": {"id": 12345},
+                },
+            ],
+        )
+    except Exception as e:
+        print(f"error msg - {str(e)}")
+        assert "special-key" not in str(e)
+
+
@pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4])
 def test_router_num_retries_init(num_retries, max_retries):
--- a/litellm/tests/test_router_batch_completion.py
+++ b/litellm/tests/test_router_batch_completion.py
@ -19,8 +19,141 @@ import os, httpx
 load_dotenv()


+@pytest.mark.parametrize("mode", ["all_responses", "fastest_response"])
@pytest.mark.asyncio
-async def test_batch_completion_multiple_models():
+async def test_batch_completion_multiple_models(mode):
+    litellm.set_verbose = True
+
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                },
+            },
+            {
+                "model_name": "groq-llama",
+                "litellm_params": {
+                    "model": "groq/llama3-8b-8192",
+                },
+            },
+        ]
+    )
+
+    if mode == "all_responses":
+        response = await router.abatch_completion(
+            models=["gpt-3.5-turbo", "groq-llama"],
+            messages=[
+                {"role": "user", "content": "is litellm becoming a better product ?"}
+            ],
+            max_tokens=15,
+        )
+
+        print(response)
+        assert len(response) == 2
+
+        models_in_responses = []
+        for individual_response in response:
+            _model = individual_response["model"]
+            models_in_responses.append(_model)
+
+        # assert both models are different
+        assert models_in_responses[0] != models_in_responses[1]
+    elif mode == "fastest_response":
+        from openai.types.chat.chat_completion import ChatCompletion
+
+        response = await router.abatch_completion_fastest_response(
+            model="gpt-3.5-turbo, groq-llama",
+            messages=[
+                {"role": "user", "content": "is litellm becoming a better product ?"}
+            ],
+            max_tokens=15,
+        )
+
+        ChatCompletion.model_validate(response.model_dump(), strict=True)
+
+
+@pytest.mark.asyncio
+async def test_batch_completion_fastest_response_unit_test():
+    """
+    Unit test to confirm fastest response will always return the response which arrives earliest.
+
+    2 models -> 1 is cached, the other is a real llm api call => assert cached response always returned
+    """
+    litellm.set_verbose = True
+
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-4",
+                "litellm_params": {
+                    "model": "gpt-4",
+                },
+                "model_info": {"id": "1"},
+            },
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "mock_response": "This is a fake response",
+                },
+                "model_info": {"id": "2"},
+            },
+        ]
+    )
+
+    response = await router.abatch_completion_fastest_response(
+        model="gpt-4, gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": "is litellm becoming a better product ?"}
+        ],
+        max_tokens=500,
+    )
+
+    assert response._hidden_params["model_id"] == "2"
+    assert response.choices[0].message.content == "This is a fake response"
+    print(f"response: {response}")
+
+
+@pytest.mark.asyncio
+async def test_batch_completion_fastest_response_streaming():
+    litellm.set_verbose = True
+
+    router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                },
+            },
+            {
+                "model_name": "groq-llama",
+                "litellm_params": {
+                    "model": "groq/llama3-8b-8192",
+                },
+            },
+        ]
+    )
+
+    from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+
+    response = await router.abatch_completion_fastest_response(
+        model="gpt-3.5-turbo, groq-llama",
+        messages=[
+            {"role": "user", "content": "is litellm becoming a better product ?"}
+        ],
+        max_tokens=15,
+        stream=True,
+    )
+
+    async for chunk in response:
+        ChatCompletionChunk.model_validate(chunk.model_dump(), strict=True)
+
+
+@pytest.mark.asyncio
+async def test_batch_completion_multiple_models_multiple_messages():
    litellm.set_verbose = True

    router = litellm.Router(
@ -43,18 +176,21 @@ async def test_batch_completion_multiple_models():
    response = await router.abatch_completion(
        models=["gpt-3.5-turbo", "groq-llama"],
        messages=[
-            {"role": "user", "content": "is litellm becoming a better product ?"}
+            [{"role": "user", "content": "is litellm becoming a better product ?"}],
+            [{"role": "user", "content": "who is this"}],
        ],
        max_tokens=15,
    )

-    print(response)
+    print("response from batches =", response)
    assert len(response) == 2
+    assert len(response[0]) == 2
+    assert isinstance(response[0][0], litellm.ModelResponse)

-    models_in_responses = []
-    for individual_response in response:
-        _model = individual_response["model"]
-        models_in_responses.append(_model)
+    # models_in_responses = []
+    # for individual_response in response:
+    #     _model = individual_response["model"]
+    #     models_in_responses.append(_model)

-    # assert both models are different
-    assert models_in_responses[0] != models_in_responses[1]
+    # # assert both models are different
+    # assert models_in_responses[0] != models_in_responses[1]
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -3,7 +3,7 @@

 import sys, os, asyncio
 import traceback
-import time, pytest
+import time, pytest, uuid
 from pydantic import BaseModel
 from typing import Tuple

@ -241,203 +241,138 @@ def test_completion_azure_stream_content_filter_no_delta():
    """
    try:
        chunks = [
-                {
+            {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
-                    "delta": {
-                        "content": "",
-                        "role": "assistant"
-                    },
-                    "finish_reason": None,
-                    "index": 0
+                        "delta": {"content": "", "role": "assistant"},
+                        "finish_reason": None,
+                        "index": 0,
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {"delta": {"content": "This"}, "finish_reason": None, "index": 0}
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {"delta": {"content": " is"}, "finish_reason": None, "index": 0}
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {"delta": {"content": " a"}, "finish_reason": None, "index": 0}
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {"delta": {"content": " dummy"}, "finish_reason": None, "index": 0}
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
-                    "delta": {
-                        "content": "This"
-                    },
-                    "finish_reason": None,
-                    "index": 0
+                        "delta": {"content": " response"},
+                        "finish_reason": None,
+                        "index": 0,
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
-                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
-                "choices": [
-                    {
-                    "delta": {
-                        "content": " is"
-                    },
-                    "finish_reason": None,
-                    "index": 0
-                    }
-                ],
-                "created": 1716563849,
-                "model": "gpt-4o-2024-05-13",
-                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
-                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
-                "choices": [
-                    {
-                    "delta": {
-                        "content": " a"
-                    },
-                    "finish_reason": None,
-                    "index": 0
-                    }
-                ],
-                "created": 1716563849,
-                "model": "gpt-4o-2024-05-13",
-                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
-                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
-                "choices": [
-                    {
-                    "delta": {
-                        "content": " dummy"
-                    },
-                    "finish_reason": None,
-                    "index": 0
-                    }
-                ],
-                "created": 1716563849,
-                "model": "gpt-4o-2024-05-13",
-                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
-                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
-                "choices": [
-                    {
-                    "delta": {
-                        "content": " response"
-                    },
-                    "finish_reason": None,
-                    "index": 0
-                    }
-                ],
-                "created": 1716563849,
-                "model": "gpt-4o-2024-05-13",
-                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
                "id": "",
                "choices": [
                    {
-                    "finish_reason": None,
-                    "index": 0,
-                    "content_filter_offsets": {
-                        "check_offset": 35159,
-                        "start_offset": 35159,
-                        "end_offset": 36150
-                    },
-                    "content_filter_results": {
-                        "hate": {
-                        "filtered": False,
-                        "severity": "safe"
+                        "finish_reason": None,
+                        "index": 0,
+                        "content_filter_offsets": {
+                            "check_offset": 35159,
+                            "start_offset": 35159,
+                            "end_offset": 36150,
                        },
-                        "self_harm": {
-                        "filtered": False,
-                        "severity": "safe"
+                        "content_filter_results": {
+                            "hate": {"filtered": False, "severity": "safe"},
+                            "self_harm": {"filtered": False, "severity": "safe"},
+                            "sexual": {"filtered": False, "severity": "safe"},
+                            "violence": {"filtered": False, "severity": "safe"},
                        },
-                        "sexual": {
-                        "filtered": False,
-                        "severity": "safe"
-                        },
-                        "violence": {
-                        "filtered": False,
-                        "severity": "safe"
-                        }
-                    }
                    }
                ],
                "created": 0,
                "model": "",
-                "object": ""
-                },
-                {
+                "object": "",
+            },
+            {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
-                    {
-                    "delta": {
-                        "content": "."
-                    },
-                    "finish_reason": None,
-                    "index": 0
-                    }
+                    {"delta": {"content": "."}, "finish_reason": None, "index": 0}
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
-                "choices": [
-                    {
-                    "delta": {},
-                    "finish_reason": "stop",
-                    "index": 0
-                    }
-                ],
+                "choices": [{"delta": {}, "finish_reason": "stop", "index": 0}],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
-                "system_fingerprint": "fp_5f4bad809a"
-                },
-                {
+                "system_fingerprint": "fp_5f4bad809a",
+            },
+            {
                "id": "",
                "choices": [
                    {
-                    "finish_reason": None,
-                    "index": 0,
-                    "content_filter_offsets": {
-                        "check_offset": 36150,
-                        "start_offset": 36060,
-                        "end_offset": 37029
-                    },
-                    "content_filter_results": {
-                        "hate": {
-                        "filtered": False,
-                        "severity": "safe"
+                        "finish_reason": None,
+                        "index": 0,
+                        "content_filter_offsets": {
+                            "check_offset": 36150,
+                            "start_offset": 36060,
+                            "end_offset": 37029,
                        },
-                        "self_harm": {
-                        "filtered": False,
-                        "severity": "safe"
+                        "content_filter_results": {
+                            "hate": {"filtered": False, "severity": "safe"},
+                            "self_harm": {"filtered": False, "severity": "safe"},
+                            "sexual": {"filtered": False, "severity": "safe"},
+                            "violence": {"filtered": False, "severity": "safe"},
                        },
-                        "sexual": {
-                        "filtered": False,
-                        "severity": "safe"
-                        },
-                        "violence": {
-                        "filtered": False,
-                        "severity": "safe"
-                        }
-                    }
                    }
                ],
                "created": 0,
                "model": "",
-                "object": ""
-                }            
+                "object": "",
+            },
        ]

        chunk_list = []
@ -1449,29 +1384,68 @@ def test_bedrock_claude_3_streaming():
        pytest.fail(f"Error occurred: {e}")


+@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
-async def test_claude_3_streaming_finish_reason():
+async def test_claude_3_streaming_finish_reason(sync_mode):
    try:
+        import threading
+
        litellm.set_verbose = True
        messages = [
            {"role": "system", "content": "Be helpful"},
            {"role": "user", "content": "What do you know?"},
        ]
-        response: ModelResponse = await litellm.acompletion(  # type: ignore
-            model="claude-3-opus-20240229",
-            messages=messages,
-            stream=True,
-            max_tokens=10,
-        )
-        complete_response = ""
-        # Add any assertions here to-check the response
-        num_finish_reason = 0
-        async for chunk in response:
-            print(f"chunk: {chunk}")
-            if isinstance(chunk, ModelResponse):
-                if chunk.choices[0].finish_reason is not None:
-                    num_finish_reason += 1
-        assert num_finish_reason == 1
+
+        def sync_test_streaming():
+            response: litellm.CustomStreamWrapper = litellm.acompletion(  # type: ignore
+                model="claude-3-opus-20240229",
+                messages=messages,
+                stream=True,
+                max_tokens=10,
+            )
+            complete_response = ""
+            # Add any assertions here to-check the response
+            num_finish_reason = 0
+            for chunk in response:
+                print(f"chunk: {chunk}")
+                if isinstance(chunk, ModelResponse):
+                    if chunk.choices[0].finish_reason is not None:
+                        num_finish_reason += 1
+            assert num_finish_reason == 1
+
+        async def test_streaming():
+            response: litellm.CustomStreamWrapper = await litellm.acompletion(  # type: ignore
+                model="claude-3-opus-20240229",
+                messages=messages,
+                stream=True,
+                max_tokens=10,
+            )
+            complete_response = ""
+            # Add any assertions here to-check the response
+            num_finish_reason = 0
+            async for chunk in response:
+                print(f"chunk: {chunk}")
+                if isinstance(chunk, ModelResponse):
+                    if chunk.choices[0].finish_reason is not None:
+                        num_finish_reason += 1
+            assert num_finish_reason == 1
+
+        tasks = []
+        for _ in range(2):
+            if sync_mode == False:
+                tasks.append(test_streaming())
+            else:
+                thread = threading.Thread(target=sync_test_streaming)
+                thread.start()
+                tasks.append(thread)
+
+        if sync_mode == False:
+            await asyncio.gather(*tasks)
+        else:
+            # Wait for all threads to complete
+            for thread in tasks:
+                thread.join()
+
    except RateLimitError:
        pass
    except Exception as e:
--- a/litellm/tests/test_traceloop.py
+++ b/litellm/tests/test_traceloop.py
@ -1,49 +1,35 @@
-# Commented out for now - since traceloop break ci/cd
-# import sys
-# import os
-# import io, asyncio
+import sys
+import os
+import time
+import pytest
+import litellm
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from traceloop.sdk import Traceloop

-# sys.path.insert(0, os.path.abspath('../..'))
-
-# from litellm import completion
-# import litellm
-# litellm.num_retries = 3
-# litellm.success_callback = [""]
-# import time
-# import pytest
-# from traceloop.sdk import Traceloop
-# Traceloop.init(app_name="test-litellm", disable_batch=True)
+sys.path.insert(0, os.path.abspath("../.."))


-# def test_traceloop_logging():
-#     try:
-#         litellm.set_verbose = True
-#         response = litellm.completion(
-#             model="gpt-3.5-turbo",
-#             messages=[{"role": "user", "content":"This is a test"}],
-#             max_tokens=1000,
-#             temperature=0.7,
-#             timeout=5,
-#         )
-#         print(f"response: {response}")
-#     except Exception as e:
-#         pytest.fail(f"An exception occurred - {e}")
-# # test_traceloop_logging()
+@pytest.fixture()
+def exporter():
+    exporter = InMemorySpanExporter()
+    Traceloop.init(
+        app_name="test_litellm",
+        disable_batch=True,
+        exporter=exporter,
+    )
+    litellm.success_callback = ["traceloop"]
+    litellm.set_verbose = True
+
+    return exporter


-# # def test_traceloop_logging_async():
-# #     try:
-# #         litellm.set_verbose = True
-# #         async def test_acompletion():
-# #             return await litellm.acompletion(
-# #                 model="gpt-3.5-turbo",
-# #                 messages=[{"role": "user", "content":"This is a test"}],
-# #                 max_tokens=1000,
-# #                 temperature=0.7,
-# #                 timeout=5,
-# #             )
-# #         response = asyncio.run(test_acompletion())
-# #         print(f"response: {response}")
-# #     except Exception as e:
-# #         pytest.fail(f"An exception occurred - {e}")
-# # test_traceloop_logging_async()
+@pytest.mark.parametrize("model", ["claude-instant-1.2", "gpt-3.5-turbo"])
+def test_traceloop_logging(exporter, model):
+
+    litellm.completion(
+        model=model,
+        messages=[{"role": "user", "content": "This is a test"}],
+        max_tokens=1000,
+        temperature=0.7,
+        timeout=5,
+    )
--- a/litellm/types/completion.py
+++ b/litellm/types/completion.py
@ -1,6 +1,6 @@
 from typing import List, Optional, Union, Iterable

-from pydantic import BaseModel, validator
+from pydantic import BaseModel, ConfigDict, validator

 from typing_extensions import Literal, Required, TypedDict

@ -191,6 +191,4 @@ class CompletionRequest(BaseModel):
    api_key: Optional[str] = None
    model_list: Optional[List[str]] = None

-    class Config:
-        extra = "allow"
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=(), extra="allow")
--- a/litellm/types/embedding.py
+++ b/litellm/types/embedding.py
@ -1,6 +1,6 @@
 from typing import List, Optional, Union

-from pydantic import BaseModel, validator
+from pydantic import BaseModel, ConfigDict


 class EmbeddingRequest(BaseModel):
@ -18,6 +18,4 @@ class EmbeddingRequest(BaseModel):
    litellm_logging_obj: Optional[dict] = None
    logger_fn: Optional[str] = None

-    class Config:
-        # allow kwargs
-        extra = "allow"
+    model_config = ConfigDict(extra="allow")
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -6,9 +6,8 @@ from typing import (
    Literal,
    Iterable,
 )
-from typing_extensions import override, Required
+from typing_extensions import override, Required, Dict
 from pydantic import BaseModel
-
 from openai.types.beta.threads.message_content import MessageContent
 from openai.types.beta.threads.message import Message as OpenAIMessage
 from openai.types.beta.thread_create_params import (
@ -18,8 +17,23 @@ from openai.types.beta.assistant_tool_param import AssistantToolParam
 from openai.types.beta.threads.run import Run
 from openai.types.beta.assistant import Assistant
 from openai.pagination import SyncCursorPage
+from os import PathLike
+from openai.types import FileObject, Batch
+from openai._legacy_response import HttpxBinaryResponseContent
+from typing import TypedDict, List, Optional, Tuple, Mapping, IO

-from typing import TypedDict, List, Optional
+FileContent = Union[IO[bytes], bytes, PathLike]
+
+FileTypes = Union[
+    # file (or bytes)
+    FileContent,
+    # (filename, file (or bytes))
+    Tuple[Optional[str], FileContent],
+    # (filename, file (or bytes), content_type)
+    Tuple[Optional[str], FileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
+]


 class NotGiven:
@ -146,3 +160,96 @@ class Thread(BaseModel):

    object: Literal["thread"]
    """The object type, which is always `thread`."""
+
+
+# OpenAI Files Types
+class CreateFileRequest(TypedDict, total=False):
+    """
+    CreateFileRequest
+    Used by Assistants API, Batches API, and Fine-Tunes API
+
+    Required Params:
+        file: FileTypes
+        purpose: Literal['assistants', 'batch', 'fine-tune']
+
+    Optional Params:
+        extra_headers: Optional[Dict[str, str]]
+        extra_body: Optional[Dict[str, str]] = None
+        timeout: Optional[float] = None
+    """
+
+    file: FileTypes
+    purpose: Literal["assistants", "batch", "fine-tune"]
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class FileContentRequest(TypedDict, total=False):
+    """
+    FileContentRequest
+    Used by Assistants API, Batches API, and Fine-Tunes API
+
+    Required Params:
+        file_id: str
+
+    Optional Params:
+        extra_headers: Optional[Dict[str, str]]
+        extra_body: Optional[Dict[str, str]] = None
+        timeout: Optional[float] = None
+    """
+
+    file_id: str
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+# OpenAI Batches Types
+class CreateBatchRequest(TypedDict, total=False):
+    """
+    CreateBatchRequest
+    """
+
+    completion_window: Literal["24h"]
+    endpoint: Literal["/v1/chat/completions", "/v1/embeddings"]
+    input_file_id: str
+    metadata: Optional[Dict[str, str]]
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class RetrieveBatchRequest(TypedDict, total=False):
+    """
+    RetrieveBatchRequest
+    """
+
+    batch_id: str
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class CancelBatchRequest(TypedDict, total=False):
+    """
+    CancelBatchRequest
+    """
+
+    batch_id: str
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class ListBatchRequest(TypedDict, total=False):
+    """
+    ListBatchRequest - List your organization's batches
+    Calls https://api.openai.com/v1/batches
+    """
+
+    after: Union[str, NotGiven]
+    limit: Union[int, NotGiven]
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -1,12 +1,12 @@
 """
-    litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
+litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
 """

 from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
 import uuid
 import enum
 import httpx
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 import datetime
 from .completion import CompletionRequest
 from .embedding import EmbeddingRequest
@ -18,8 +18,7 @@ class ModelConfig(BaseModel):
    tpm: int
    rpm: int

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class RouterConfig(BaseModel):
@ -50,8 +49,7 @@ class RouterConfig(BaseModel):
        "latency-based-routing",
    ] = "simple-shuffle"

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class UpdateRouterConfig(BaseModel):
@ -71,17 +69,14 @@ class UpdateRouterConfig(BaseModel):
    fallbacks: Optional[List[dict]] = None
    context_window_fallbacks: Optional[List[dict]] = None

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class ModelInfo(BaseModel):
    id: Optional[
        str
    ]  # Allow id to be optional on input, but it will always be present as a str in the model instance
-    db_model: bool = (
-        False  # used for proxy - to separate models which are stored in the db vs. config.
-    )
+    db_model: bool = False  # used for proxy - to separate models which are stored in the db vs. config.
    updated_at: Optional[datetime.datetime] = None
    updated_by: Optional[str] = None

@ -99,8 +94,7 @@ class ModelInfo(BaseModel):
            id = str(id)
        super().__init__(id=id, **params)

-    class Config:
-        extra = "allow"
+    model_config = ConfigDict(extra="allow")

    def __contains__(self, key):
        # Define custom behavior for the 'in' operator
@ -155,6 +149,8 @@ class GenericLiteLLMParams(BaseModel):
    input_cost_per_second: Optional[float] = None
    output_cost_per_second: Optional[float] = None

+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+
    def __init__(
        self,
        custom_llm_provider: Optional[str] = None,
@ -184,7 +180,7 @@ class GenericLiteLLMParams(BaseModel):
        output_cost_per_token: Optional[float] = None,
        input_cost_per_second: Optional[float] = None,
        output_cost_per_second: Optional[float] = None,
-        **params
+        **params,
    ):
        args = locals()
        args.pop("max_retries", None)
@ -195,10 +191,6 @@ class GenericLiteLLMParams(BaseModel):
            max_retries = int(max_retries)  # cast to int
        super().__init__(max_retries=max_retries, **args, **params)

-    class Config:
-        extra = "allow"
-        arbitrary_types_allowed = True
-
    def __contains__(self, key):
        # Define custom behavior for the 'in' operator
        return hasattr(self, key)
@ -222,6 +214,7 @@ class LiteLLM_Params(GenericLiteLLMParams):
    """

    model: str
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    def __init__(
        self,
@ -245,7 +238,7 @@ class LiteLLM_Params(GenericLiteLLMParams):
        aws_access_key_id: Optional[str] = None,
        aws_secret_access_key: Optional[str] = None,
        aws_region_name: Optional[str] = None,
-        **params
+        **params,
    ):
        args = locals()
        args.pop("max_retries", None)
@ -256,10 +249,6 @@ class LiteLLM_Params(GenericLiteLLMParams):
            max_retries = int(max_retries)  # cast to int
        super().__init__(max_retries=max_retries, **args, **params)

-    class Config:
-        extra = "allow"
-        arbitrary_types_allowed = True
-
    def __contains__(self, key):
        # Define custom behavior for the 'in' operator
        return hasattr(self, key)
@ -288,8 +277,7 @@ class updateDeployment(BaseModel):
    litellm_params: Optional[updateLiteLLMParams] = None
    model_info: Optional[ModelInfo] = None

-    class Config:
-        protected_namespaces = ()
+    model_config = ConfigDict(protected_namespaces=())


 class LiteLLMParamsTypedDict(TypedDict, total=False):
@ -338,12 +326,14 @@ class Deployment(BaseModel):
    litellm_params: LiteLLM_Params
    model_info: ModelInfo

+    model_config = ConfigDict(extra="allow", protected_namespaces=())
+
    def __init__(
        self,
        model_name: str,
        litellm_params: LiteLLM_Params,
        model_info: Optional[Union[ModelInfo, dict]] = None,
-        **params
+        **params,
    ):
        if model_info is None:
            model_info = ModelInfo()
@ -353,7 +343,7 @@ class Deployment(BaseModel):
            model_info=model_info,
            model_name=model_name,
            litellm_params=litellm_params,
-            **params
+            **params,
        )

    def to_json(self, **kwargs):
@ -363,10 +353,6 @@ class Deployment(BaseModel):
            # if using pydantic v1
            return self.dict(**kwargs)

-    class Config:
-        extra = "allow"
-        protected_namespaces = ()
-
    def __contains__(self, key):
        # Define custom behavior for the 'in' operator
        return hasattr(self, key)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -18,7 +18,7 @@ from functools import wraps, lru_cache
 import datetime, time
 import tiktoken
 import uuid
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 import aiohttp
 import textwrap
 import logging
@ -337,9 +337,7 @@ class HiddenParams(OpenAIObject):
    model_id: Optional[str] = None  # used in Router for individual deployments
    api_base: Optional[str] = None  # returns api base used for making completion call

-    class Config:
-        extra = "allow"
-        protected_namespaces = ()
+    model_config = ConfigDict(extra="allow", protected_namespaces=())

    def get(self, key, default=None):
        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
@ -1136,6 +1134,8 @@ class CallTypes(Enum):
    amoderation = "amoderation"
    atranscription = "atranscription"
    transcription = "transcription"
+    aspeech = "aspeech"
+    speech = "speech"


 # Logging function -> log the exact model details + what's being sent | Non-BlockingP
@ -2027,6 +2027,7 @@ class Logging:
                            response_obj=result,
                            start_time=start_time,
                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
                            print_verbose=print_verbose,
                        )
                    if callback == "s3":
@ -2598,6 +2599,17 @@ class Logging:
                            level="ERROR",
                            kwargs=self.model_call_details,
                        )
+                    if callback == "traceloop":
+                        traceloopLogger.log_event(
+                            start_time=start_time,
+                            end_time=end_time,
+                            response_obj=None,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                            status_message=str(exception),
+                            level="ERROR",
+                            kwargs=self.model_call_details,
+                        )
                    if callback == "prometheus":
                        global prometheusLogger
                        verbose_logger.debug("reaches prometheus for success logging!")
@ -2993,6 +3005,10 @@ def function_setup(
        ):
            _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
            messages = "audio_file"
+        elif (
+            call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
+        ):
+            messages = kwargs.get("input", "speech")
        stream = True if "stream" in kwargs and kwargs["stream"] == True else False
        logging_obj = Logging(
            model=model,
@ -3334,6 +3350,8 @@ def client(original_function):
                return result
            elif "atranscription" in kwargs and kwargs["atranscription"] == True:
                return result
+            elif "aspeech" in kwargs and kwargs["aspeech"] == True:
+                return result

            ### POST-CALL RULES ###
            post_call_processing(original_response=result, model=model or None)
@ -5740,6 +5758,8 @@ def get_optional_params(
            optional_params["stream"] = stream
        if temperature is not None:
            optional_params["temperature"] = temperature
+        if seed is not None:
+            optional_params["seed"] = seed
        if top_p is not None:
            optional_params["top_p"] = top_p
        if frequency_penalty is not None:
@ -6392,6 +6412,8 @@ def get_supported_openai_params(
            return ["stream", "temperature", "max_tokens"]
        elif model.startswith("mistral"):
            return ["max_tokens", "temperature", "stop", "top_p", "stream"]
+    elif custom_llm_provider == "ollama":
+        return litellm.OllamaConfig().get_supported_openai_params()
    elif custom_llm_provider == "ollama_chat":
        return litellm.OllamaChatConfig().get_supported_openai_params()
    elif custom_llm_provider == "anthropic":
@ -6561,16 +6583,6 @@ def get_supported_openai_params(
        ]
    elif custom_llm_provider == "cloudflare":
        return ["max_tokens", "stream"]
-    elif custom_llm_provider == "ollama":
-        return [
-            "max_tokens",
-            "stream",
-            "top_p",
-            "temperature",
-            "frequency_penalty",
-            "stop",
-            "response_format",
-        ]
    elif custom_llm_provider == "nlp_cloud":
        return [
            "max_tokens",
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1265,8 +1265,8 @@
        "max_tokens": 4096,
        "max_input_tokens": 200000,
        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.0000075,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000075,
        "litellm_provider": "vertex_ai-anthropic_models",
        "mode": "chat",
        "supports_function_calling": true,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.38.11"
+version = "1.39.5"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -79,8 +79,10 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.38.11"
+version = "1.39.5"
 version_files = [
    "pyproject.toml:^version"
 ]

+[tool.mypy]
+plugins = "pydantic.mypy"
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -12,6 +12,7 @@ sys.path.insert(
    0, os.path.abspath("../")
 )  # Adds the parent directory to the system path
 import litellm
+from litellm.proxy._types import LitellmUserRoles


 async def generate_team(
@ -731,7 +732,9 @@ async def test_key_delete_ui():

        # generate a admin UI key
        team = await generate_team(session=session)
-        admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
+        admin_ui_key = await generate_user(
+            session=session, user_role=LitellmUserRoles.PROXY_ADMIN.value
+        )
        print(
            "trying to delete key=",
            key,
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/PcGFjo5-03lHREJ3E0k6y/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/134-4a7b43f992182f2c.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/134-4a7b43f992182f2c.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/359-f105a7fb61fe8110.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/359-f105a7fb61fe8110.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/440-b9a05f116e1a696d.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/440-b9a05f116e1a696d.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/608-d128caa3cfe973c1.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/608-d128caa3cfe973c1.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/layout-0e60605a9e4bc89a.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/layout-0e60605a9e4bc89a.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/model_hub/page-aa3c10cf9bb31255.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/model_hub/page-aa3c10cf9bb31255.js
@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-76d278f96a0e9768.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-76d278f96a0e9768.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-f610596e5fb3cce4.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-f610596e5fb3cce4.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/main-73518c457ac08a68.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/main-73518c457ac08a68.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-e85084d25f9ae5e4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/webpack-e85084d25f9ae5e4.js
@ -1 +1 @@
-!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5d93d4a9fa59d72f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
+!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/33354d8285fe572e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
--- a/ui/litellm-dashboard/out/_next/static/css/33354d8285fe572e.css
+++ b/ui/litellm-dashboard/out/_next/static/css/33354d8285fe572e.css
--- a/ui/litellm-dashboard/out/_next/static/css/5d93d4a9fa59d72f.css
+++ b/ui/litellm-dashboard/out/_next/static/css/5d93d4a9fa59d72f.css
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/33354d8285fe572e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45014,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"359\",\"static/chunks/359-f105a7fb61fe8110.js\",\"440\",\"static/chunks/440-b9a05f116e1a696d.js\",\"134\",\"static/chunks/134-4a7b43f992182f2c.js\",\"931\",\"static/chunks/app/page-f610596e5fb3cce4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/33354d8285fe572e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PcGFjo5-03lHREJ3E0k6y\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
+3:I[45014,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","359","static/chunks/359-f105a7fb61fe8110.js","440","static/chunks/440-b9a05f116e1a696d.js","134","static/chunks/134-4a7b43f992182f2c.js","931","static/chunks/app/page-f610596e5fb3cce4.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/model_hub.html
+++ b/ui/litellm-dashboard/out/model_hub.html
--- a/ui/litellm-dashboard/out/model_hub.txt
+++ b/ui/litellm-dashboard/out/model_hub.txt
@ -0,0 +1,7 @@
+2:I[77831,[],""]
+3:I[87494,["359","static/chunks/359-f105a7fb61fe8110.js","134","static/chunks/134-4a7b43f992182f2c.js","418","static/chunks/app/model_hub/page-aa3c10cf9bb31255.js"],""]
+4:I[5613,[],""]
+5:I[31778,[],""]
+0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
+1:null
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -18,7 +18,7 @@ import Usage from "../components/usage";
 import { jwtDecode } from "jwt-decode";
 import { Typography } from "antd";

-export function formatUserRole(userRole: string) {
+function formatUserRole(userRole: string) {
  if (!userRole) {
    return "Undefined Role";
  }
--- a/ui/litellm-dashboard/src/components/admins.tsx
+++ b/ui/litellm-dashboard/src/components/admins.tsx
@ -58,6 +58,7 @@ import {
  User,
  setCallbacksCall,
  invitationCreateCall,
+  getPossibleUserRoles,
 } from "./networking";

 const AdminPanel: React.FC<AdminPanelProps> = ({
@ -83,6 +84,9 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
    useState(false);
  const router = useRouter();
  const [baseUrl, setBaseUrl] = useState("");
+  const [isInstructionsModalVisible, setIsInstructionsModalVisible] = useState(false);
+  const [possibleUIRoles, setPossibleUIRoles] = useState<null | Record<string, Record<string, string>>>(null);
+

  let nonSssoUrl;
  try {
@ -163,6 +167,9 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
        console.log(`proxy admins: ${proxyAdmins}`);
        console.log(`combinedList: ${combinedList}`);
        setAdmins(combinedList);
+
+        const availableUserRoles = await getPossibleUserRoles(accessToken);
+        setPossibleUIRoles(availableUserRoles);
      }
    };

@ -435,7 +442,7 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
                              ? member["user_id"]
                              : null}
                        </TableCell>
-                        <TableCell>{member["user_role"]}</TableCell>
+                        <TableCell> {possibleUIRoles?.[member?.user_role]?.ui_label || "-"}</TableCell>
                        <TableCell>
                          <Icon
                            icon={PencilAltIcon}
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@ -149,6 +149,12 @@ const ChatUI: React.FC<ChatUIProps> = ({
    });
  };

+  const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
+    if (event.key === 'Enter') {
+      handleSendMessage();
+    }
+  };
+
  const handleSendMessage = async () => {
    if (inputMessage.trim() === "") return;

@ -260,6 +266,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
                      type="text"
                      value={inputMessage}
                      onChange={(e) => setInputMessage(e.target.value)}
+                      onKeyDown={handleKeyDown} // Add this line
                      placeholder="Type your message..."
                    />
                    <Button
--- a/ui/litellm-dashboard/src/components/edit_user.tsx
+++ b/ui/litellm-dashboard/src/components/edit_user.tsx
@ -0,0 +1,138 @@
+import { useEffect, useState } from 'react';
+import {
+  Dialog,
+  DialogPanel,
+  TextInput,
+  Button,
+  Select,
+  SelectItem,
+  Text,
+  Title,
+  Subtitle,
+} from '@tremor/react';
+
+import {
+    Button as Button2,
+    Modal,
+    Form,
+    Input,
+    Select as Select2,
+    InputNumber,
+    message,
+  } from "antd";
+
+interface EditUserModalProps {
+  visible: boolean;
+  possibleUIRoles: null | Record<string, Record<string, string>>;
+  onCancel: () => void;
+  user: any;
+  onSubmit: (data: any) => void;
+}
+
+const EditUserModal: React.FC<EditUserModalProps> = ({ visible, possibleUIRoles, onCancel, user, onSubmit }) => {
+  const [editedUser, setEditedUser] = useState(user);
+  const [form] = Form.useForm();
+
+  useEffect(() => {
+    form.resetFields();
+  }, [user]);
+
+  const handleCancel = async () => {
+    form.resetFields();
+    onCancel();
+  };
+
+  const handleEditSubmit = async (formValues: Record<string, any>) => {
+    // Call API to update team with teamId and values
+    onSubmit(formValues);
+    form.resetFields();
+    onCancel();
+  };
+
+
+
+  if (!user) {
+    return null;
+  }
+
+  return (
+
+    <Modal 
+    visible={visible} 
+    onCancel={handleCancel} 
+    footer={null}
+    title={"Edit User " + user.user_id}
+    width={1000}
+    >
+        <Form
+          form={form}
+          onFinish={handleEditSubmit}
+          initialValues={user} // Pass initial values here
+          labelCol={{ span: 8 }}
+          wrapperCol={{ span: 16 }}
+          labelAlign="left"
+        >
+          <>
+            <Form.Item 
+            className="mt-8" 
+            label="User Email" 
+            tooltip="Email of the User"
+            name="user_email">
+              <TextInput />
+            </Form.Item>
+
+            <Form.Item
+              label="user_id"
+              name="user_id"
+              hidden={true}
+            >
+              <TextInput />
+            </Form.Item>
+
+            <Form.Item
+              label="User Role"
+              name="user_role"
+            >
+            <Select2>
+                {possibleUIRoles &&
+                    Object.entries(possibleUIRoles).map(([role, { ui_label, description }]) => (
+                        <SelectItem key={role} value={role} title={ui_label}>
+                            <div className='flex'>
+                            {ui_label} <p className="ml-2" style={{ color: "gray", fontSize: "12px" }}>{description}</p>
+                            </div>
+                        </SelectItem>
+                    ))}
+            </Select2>
+
+            </Form.Item>
+
+            <Form.Item
+              label="Spend (USD)"
+              name="spend"
+              tooltip="(float) - Spend of all LLM calls completed by this user"
+            >
+              <InputNumber min={0} step={1} />
+            </Form.Item>
+
+            <Form.Item
+              label="User Budget (USD)"
+              name="max_budget"
+              tooltip="(float) - Maximum budget of this user"
+            >
+              <InputNumber min={0} step={1} />
+            </Form.Item>
+
+            <div style={{ textAlign: "right", marginTop: "10px" }}>
+                <Button2 htmlType="submit">Save</Button2>
+            </div>
+
+        </>
+
+      </Form>
+
+    
+    </Modal>
+  );
+};
+
+export default EditUserModal;
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@ -79,7 +79,7 @@ const Sidebar: React.FC<SidebarProps> = ({

          {userRole == "Admin" ? (
            <Menu.Item key="5" onClick={() => setPage("users")}>
-              <Text>Users</Text>
+              <Text>Internal Users</Text>
            </Menu.Item>
          ) : null}

@ -91,7 +91,7 @@ const Sidebar: React.FC<SidebarProps> = ({

          {userRole == "Admin" ? (
            <Menu.Item key="9" onClick={() => setPage("budgets")}>
-              <Text>Rate Limits</Text>
+              <Text>Budgets</Text>
            </Menu.Item>
          ) : null}

--- a/ui/litellm-dashboard/src/components/model_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx
@ -49,6 +49,8 @@ import {
  getCallbacksCall,
  setCallbacksCall,
  modelSettingsCall,
+  adminGlobalActivityExceptions,
+  adminGlobalActivityExceptionsPerDeployment,
 } from "./networking";
 import { BarChart, AreaChart } from "@tremor/react";
 import {
@ -109,6 +111,13 @@ interface RetryPolicyObject {
  [key: string]: { [retryPolicyKey: string]: number } | undefined;
 }

+
+interface GlobalExceptionActivityData {
+  sum_num_rate_limit_exceptions: number;
+  daily_data: { date: string; num_rate_limit_exceptions: number; }[];
+}
+
+
 //["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"]

 interface ProviderFields {
@ -301,6 +310,9 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
    useState<RetryPolicyObject | null>(null);
  const [defaultRetry, setDefaultRetry] = useState<number>(0);

+  const [globalExceptionData, setGlobalExceptionData] =  useState<GlobalExceptionActivityData>({} as GlobalExceptionActivityData);
+  const [globalExceptionPerDeployment, setGlobalExceptionPerDeployment] = useState<any[]>([]);
+
  function formatCreatedAt(createdAt: string | null) {
    if (createdAt) {
      const date = new Date(createdAt);
@ -643,6 +655,29 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
          dateValue.to?.toISOString()
        );

+        const dailyExceptions = await adminGlobalActivityExceptions(
+          accessToken,
+          dateValue.from?.toISOString().split('T')[0],
+          dateValue.to?.toISOString().split('T')[0],
+          _initial_model_group,
+        );
+
+        setGlobalExceptionData(dailyExceptions);
+
+        const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
+          accessToken,
+          dateValue.from?.toISOString().split('T')[0],
+          dateValue.to?.toISOString().split('T')[0],
+          _initial_model_group,
+        )
+
+        setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
+
+        console.log("dailyExceptions:", dailyExceptions);
+
+        console.log("dailyExceptionsPerDeplyment:", dailyExceptionsPerDeplyment);
+
+      
        console.log("slowResponses:", slowResponses);

        setSlowResponsesData(slowResponses);
@ -905,6 +940,30 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
      console.log("slowResponses:", slowResponses);

      setSlowResponsesData(slowResponses);
+
+
+      if (modelGroup) {
+        const dailyExceptions = await adminGlobalActivityExceptions(
+          accessToken,
+          startTime?.toISOString().split('T')[0],
+          endTime?.toISOString().split('T')[0],
+          modelGroup,
+        );
+  
+        setGlobalExceptionData(dailyExceptions);
+  
+        const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
+          accessToken,
+          startTime?.toISOString().split('T')[0],
+          endTime?.toISOString().split('T')[0],
+          modelGroup,
+        )
+  
+        setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
+
+      }
+
+      
    } catch (error) {
      console.error("Failed to fetch model metrics", error);
    }
@ -1475,7 +1534,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
                    )}
                  {selectedProvider != Providers.Bedrock &&
                    selectedProvider != Providers.Vertex_AI &&
-                    dynamicProviderForm === undefined && (
+                    (dynamicProviderForm === undefined ||
+                      dynamicProviderForm.fields.length == 0) && (
                      <Form.Item
                        rules={[{ required: true, message: "Required" }]}
                        label="API Key"
@ -1777,18 +1837,110 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
                </Card>
              </Col>
            </Grid>
-            <Card className="mt-4">
-              <Title>Exceptions per Model</Title>
-              <BarChart
-                className="h-72"
-                data={modelExceptions}
-                index="model"
-                categories={allExceptions}
-                stack={true}
-                colors={["indigo-300", "rose-200", "#ffcc33"]}
-                yAxisWidth={30}
-              />
-            </Card>
+
+            <Grid numItems={1} className="gap-2 w-full mt-2">
+                <Card>
+                <Title>All Up Rate Limit Errors (429) for {selectedModelGroup}</Title>
+                <Grid numItems={1}>
+                <Col>
+                <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)}</Subtitle>
+                <BarChart
+                    className="h-40"
+                    data={globalExceptionData.daily_data}
+                    index="date"
+                    colors={['rose']}
+                    categories={['num_rate_limit_exceptions']}
+                    onValueChange={(v) => console.log(v)}
+                  />
+                  </Col>
+                  <Col>
+
+                {/* <BarChart
+                    className="h-40"
+                    data={modelExceptions}
+                    index="model"
+                    categories={allExceptions}
+                    stack={true}
+                    yAxisWidth={30}
+              /> */}
+      
+
+                </Col>
+
+                </Grid>
+                
+
+                </Card>
+
+                {
+                  premiumUser ? ( 
+                    <>
+                    {globalExceptionPerDeployment.map((globalActivity, index) => (
+                <Card key={index}>
+                  <Title>{globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"}</Title>
+                  <Grid numItems={1}>
+                    <Col>
+                      <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)}</Subtitle>
+                      <BarChart
+                        className="h-40"
+                        data={globalActivity.daily_data}
+                        index="date"
+                        colors={['rose']}
+                        categories={['num_rate_limit_exceptions']}
+          
+                        onValueChange={(v) => console.log(v)}
+                      />
+                      
+                    </Col>
+                  </Grid>
+                </Card>
+              ))}
+                    </>
+                  ) : 
+                  <>
+                  {globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 &&
+                    globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => (
+                      <Card key={index}>
+                        <Title>✨ Rate Limit Errors by Deployment</Title>
+                        <p className="mb-2 text-gray-500 italic text-[12px]">Upgrade to see exceptions for all deployments</p>
+                        <Button variant="primary" className="mb-2">
+                          <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
+                            Get Free Trial
+                          </a>
+                        </Button>
+                        <Card>
+                        <Title>{globalActivity.api_base}</Title>
+                        <Grid numItems={1}>
+                          <Col>
+                            <Subtitle
+                              style={{
+                                fontSize: "15px",
+                                fontWeight: "normal",
+                                color: "#535452",
+                              }}
+                            >
+                              Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)}
+                            </Subtitle>
+                            <BarChart
+                                className="h-40"
+                                data={globalActivity.daily_data}
+                                index="date"
+                                colors={['rose']}
+                                categories={['num_rate_limit_exceptions']}
+                  
+                                onValueChange={(v) => console.log(v)}
+                              />
+                          </Col>
+                          
+                          
+                        </Grid>
+                        </Card>
+                      </Card>
+                    ))}
+                </>
+                }              
+              </Grid>
+              
          </TabPanel>
          <TabPanel>
            <div className="flex items-center">
--- a/ui/litellm-dashboard/src/components/navbar.tsx
+++ b/ui/litellm-dashboard/src/components/navbar.tsx
@ -39,7 +39,9 @@ const Navbar: React.FC<NavbarProps> = ({

  // const userColors = require('./ui_colors.json') || {};
  const isLocal = process.env.NODE_ENV === "development";
+  const proxyBaseUrl = isLocal ? "http://localhost:4000" : null;
  const imageUrl = isLocal ? "http://localhost:4000/get_image" : "/get_image";
+  const logoutUrl = proxyBaseUrl ? `${proxyBaseUrl}` : `/`;

  const items: MenuProps["items"] = [
    {
@ -52,6 +54,14 @@ const Navbar: React.FC<NavbarProps> = ({
        </>
      ),
    },
+    {
+      key: "2",
+      label: (
+        <Link href={logoutUrl}>
+          <p>Logout</p>
+        </Link>
+      ),
+    }
  ];

  return (
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -1270,6 +1270,100 @@ export const adminGlobalActivityPerModel = async (
  }
 };

+
+
+export const adminGlobalActivityExceptions = async (
+  accessToken: String,
+  startTime: String | undefined,
+  endTime: String | undefined,
+  modelGroup: String,
+) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/global/activity/exceptions`
+      : `/global/activity/exceptions`;
+
+    if (startTime && endTime) {
+      url += `?start_date=${startTime}&end_date=${endTime}`;
+    }
+
+    if (modelGroup) {
+      url += `&model_group=${modelGroup}`;
+    }
+
+    const requestOptions: {
+      method: string;
+      headers: {
+        Authorization: string;
+      };
+    } = {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+      },
+    };
+
+    const response = await fetch(url, requestOptions);
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      throw new Error("Network response was not ok");
+    }
+    const data = await response.json();
+    console.log(data);
+    return data;
+  } catch (error) {
+    console.error("Failed to fetch spend data:", error);
+    throw error;
+  }
+};
+
+export const adminGlobalActivityExceptionsPerDeployment = async (
+  accessToken: String,
+  startTime: String | undefined,
+  endTime: String | undefined,
+  modelGroup: String,
+) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/global/activity/exceptions/deployment`
+      : `/global/activity/exceptions/deployment`;
+
+    if (startTime && endTime) {
+      url += `?start_date=${startTime}&end_date=${endTime}`;
+    }
+
+    if (modelGroup) {
+      url += `&model_group=${modelGroup}`;
+    }
+
+    const requestOptions: {
+      method: string;
+      headers: {
+        Authorization: string;
+      };
+    } = {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+      },
+    };
+
+    const response = await fetch(url, requestOptions);
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      throw new Error("Network response was not ok");
+    }
+    const data = await response.json();
+    console.log(data);
+    return data;
+  } catch (error) {
+    console.error("Failed to fetch spend data:", error);
+    throw error;
+  }
+};
+
 export const adminTopModelsCall = async (accessToken: String) => {
  try {
    let url = proxyBaseUrl
@ -1465,6 +1559,34 @@ export const userGetAllUsersCall = async (
  }
 };

+export const getPossibleUserRoles = async (
+  accessToken: String,
+) => {
+  try {
+    const url = proxyBaseUrl
+      ? `${proxyBaseUrl}/user/available_roles`
+      : `/user/available_roles`;
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      throw new Error("Network response was not ok");
+    }
+    const data = await response.json();
+    console.log("response from user/available_role", data);
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    throw error;
+  }
+};
+
 export const teamCreateCall = async (
  accessToken: string,
  formValues: Record<string, any> // Assuming formValues is an object
--- a/ui/litellm-dashboard/src/components/settings.tsx
+++ b/ui/litellm-dashboard/src/components/settings.tsx
@ -188,6 +188,43 @@ const Settings: React.FC<SettingsPageProps> = ({
    console.log("Selected values:", values);
  };

+  const handleSaveEmailSettings = () => {
+    if (!accessToken) {
+      return;
+    }
+
+
+    let updatedVariables: Record<string, string> = {};
+
+    alerts
+      .filter((alert) => alert.name === "email")
+      .forEach((alert) => {
+        Object.entries(alert.variables ?? {}).forEach(([key, value]) => {
+          const inputElement = document.querySelector(`input[name="${key}"]`) as HTMLInputElement;
+          if (inputElement && inputElement.value) {
+            updatedVariables[key] = inputElement?.value;
+          }
+        });
+      });
+
+    console.log("updatedVariables", updatedVariables);
+    //filter out null / undefined values for updatedVariables
+
+    const payload = {
+      general_settings: {
+        alerting: ["email"],
+      },
+      environment_variables: updatedVariables,
+    };
+    try {
+      setCallbacksCall(accessToken, payload);
+    } catch (error) {
+      message.error("Failed to update alerts: " + error, 20);
+    }
+
+    message.success("Email settings updated successfully");
+  }
+
  const handleSaveAlerts = () => {
    if (!accessToken) {
      return;
@ -369,7 +406,8 @@ const Settings: React.FC<SettingsPageProps> = ({
          <TabList variant="line" defaultValue="1">
            <Tab value="1">Logging Callbacks</Tab>
            <Tab value="2">Alerting Types</Tab>
-            <Tab value="2">Alerting Settings</Tab>
+            <Tab value="3">Alerting Settings</Tab>
+            <Tab value="4">Email Alerts</Tab>
          </TabList>
          <TabPanels>
            <TabPanel>
@ -526,6 +564,142 @@ const Settings: React.FC<SettingsPageProps> = ({
                premiumUser={premiumUser}
              />
            </TabPanel>
+            <TabPanel>
+              <Card>
+      <Title>Email Settings</Title>
+      <Text>
+      <a href="https://docs.litellm.ai/docs/proxy/email" target="_blank" style={{ color: "blue" }}> LiteLLM Docs: email alerts</a> <br/>        
+      </Text>
+<div className="flex w-full">
+  {alerts
+    .filter((alert) => alert.name === "email")
+    .map((alert, index) => (
+      <TableCell key={index}>
+
+        <ul>
+        <Grid numItems={2}>
+          {Object.entries(alert.variables ?? {}).map(([key, value]) => (
+            <li key={key} className="mx-2 my-2">
+              
+              { premiumUser!= true && (key === "EMAIL_LOGO_URL" || key === "EMAIL_SUPPORT_CONTACT") ? (
+                <div>
+                  <a
+                  href="https://forms.gle/W3U4PZpJGFHWtHyA9"
+                  target="_blank"
+                >
+                  <Text className="mt-2">
+                  {" "}
+                  ✨ {key}
+
+                  </Text>
+                 
+                </a>
+                <TextInput
+                name={key}
+                defaultValue={value as string}
+                type="password"
+                disabled={true}
+                style={{ width: "400px" }}
+              />
+                </div>
+                
+              ) : (
+                <div>
+                  <Text className="mt-2">{key}</Text>
+                  <TextInput
+                name={key}
+                defaultValue={value as string}
+                type="password"
+                style={{ width: "400px" }}
+              />
+                </div>
+                
+              )}
+              
+              {/* Added descriptions for input fields */}
+              <p style={{ fontSize: "small", fontStyle: "italic" }}>
+                {key === "SMTP_HOST" && (
+                  <div style={{ color: "gray" }}>
+                    Enter the SMTP host address, e.g. `smtp.resend.com`
+                    <span style={{ color: "red" }}> Required * </span>
+                  </div>
+                  
+                )}
+
+                {key === "SMTP_PORT" && (
+                  <div style={{ color: "gray" }}>
+                    Enter the SMTP port number, e.g. `587`
+                     <span style={{ color: "red" }}> Required * </span>
+
+                  </div>
+                 
+                )}
+                
+                {key === "SMTP_USERNAME" && (
+                  <div style={{ color: "gray" }}>
+                    Enter the SMTP username, e.g. `username`
+                    <span style={{ color: "red" }}> Required * </span>
+                  </div>
+                  
+                )}
+                
+                {key === "SMTP_PASSWORD" && (
+                  <span style={{ color: "red" }}> Required * </span>
+                )}
+
+                {key === "SMTP_SENDER_EMAIL" && (
+                  <div style={{ color: "gray" }}>
+                    Enter the sender email address, e.g. `sender@berri.ai`
+                  <span style={{ color: "red" }}> Required * </span>
+
+                  </div>
+                )}
+
+                {key === "TEST_EMAIL_ADDRESS" && (
+                  <div style={{ color: "gray" }}>
+                  Email Address to send `Test Email Alert` to. example: `info@berri.ai`
+                  <span style={{ color: "red" }}> Required * </span>
+                  </div>
+                )
+                }
+                {key === "EMAIL_LOGO_URL" && (
+                  <div style={{ color: "gray" }}>
+                   (Optional) Customize the Logo that appears in the email, pass a url to your logo
+                  </div>
+                )
+                }
+                {key === "EMAIL_SUPPORT_CONTACT" && (
+                  <div style={{ color: "gray" }}>
+                   (Optional) Customize the support email address that appears in the email. Default is support@berri.ai
+                  </div>
+                )
+                   }
+              </p>
+            </li>
+          ))}
+                  </Grid>
+        </ul>
+      </TableCell>
+    ))}
+</div>
+
+            <Button
+              className="mt-2"
+              onClick={() => handleSaveEmailSettings()}
+            >
+              Save Changes
+            </Button>
+            <Button
+              onClick={() =>
+                serviceHealthCheck(accessToken, "email")
+              }
+              className="mx-2"
+            >
+              Test Email Alerts
+            </Button>
+            
+              </Card>
+          </TabPanel>
          </TabPanels>
        </TabGroup>
      </Grid>
--- a/ui/litellm-dashboard/src/components/usage.tsx
+++ b/ui/litellm-dashboard/src/components/usage.tsx
@ -162,6 +162,17 @@ const UsagePage: React.FC<UsagePageProps> = ({
  console.log("keys in usage", keys);
  console.log("premium user in usage", premiumUser);

+  function valueFormatterNumbers(number: number) {
+    const formatter = new Intl.NumberFormat('en-US', {
+      maximumFractionDigits: 0,
+      notation: 'compact',
+      compactDisplay: 'short',
+    });
+  
+    return formatter.format(number);
+  }
+  
+
  const updateEndUserData = async (startTime:  Date | undefined, endTime:  Date | undefined, uiSelectedKey: string | null) => {
    if (!startTime || !endTime || !accessToken) {
      return;
@ -482,10 +493,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
                <Title>All Up</Title>
                <Grid numItems={2}>
                <Col>
-                <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {globalActivity.sum_api_requests}</Subtitle>
+                <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests { valueFormatterNumbers(globalActivity.sum_api_requests)}</Subtitle>
                <AreaChart
                    className="h-40"
                    data={globalActivity.daily_data}
+                    valueFormatter={valueFormatterNumbers}
                    index="date"
                    colors={['cyan']}
                    categories={['api_requests']}
@ -494,10 +506,11 @@ const UsagePage: React.FC<UsagePageProps> = ({

                </Col>
                <Col>
-                <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {globalActivity.sum_total_tokens}</Subtitle>
+                <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens { valueFormatterNumbers(globalActivity.sum_total_tokens)}</Subtitle>
                <BarChart
                    className="h-40"
                    data={globalActivity.daily_data}
+                    valueFormatter={valueFormatterNumbers}
                    index="date"
                    colors={['cyan']}
                    categories={['total_tokens']}
@ -517,24 +530,26 @@ const UsagePage: React.FC<UsagePageProps> = ({
                  <Title>{globalActivity.model}</Title>
                  <Grid numItems={2}>
                    <Col>
-                      <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {globalActivity.sum_api_requests}</Subtitle>
+                      <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {valueFormatterNumbers(globalActivity.sum_api_requests)}</Subtitle>
                      <AreaChart
                        className="h-40"
                        data={globalActivity.daily_data}
                        index="date"
                        colors={['cyan']}
                        categories={['api_requests']}
+                        valueFormatter={valueFormatterNumbers}
                        onValueChange={(v) => console.log(v)}
                      />
                    </Col>
                    <Col>
-                      <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {globalActivity.sum_total_tokens}</Subtitle>
+                      <Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {valueFormatterNumbers(globalActivity.sum_total_tokens)}</Subtitle>
                      <BarChart
                        className="h-40"
                        data={globalActivity.daily_data}
                        index="date"
                        colors={['cyan']}
                        categories={['total_tokens']}
+                        valueFormatter={valueFormatterNumbers}
                        onValueChange={(v) => console.log(v)}
                      />
                    </Col>
@ -565,7 +580,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
                                color: "#535452",
                              }}
                            >
-                              API Requests {globalActivity.sum_api_requests}
+                              API Requests {valueFormatterNumbers(globalActivity.sum_api_requests)}
                            </Subtitle>
                            <AreaChart
                              className="h-40"
@ -573,6 +588,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
                              index="date"
                              colors={['cyan']}
                              categories={['api_requests']}
+                              valueFormatter={valueFormatterNumbers}
                              onValueChange={(v) => console.log(v)}
                            />
                          </Col>
@ -584,13 +600,14 @@ const UsagePage: React.FC<UsagePageProps> = ({
                                color: "#535452",
                              }}
                            >
-                              Tokens {globalActivity.sum_total_tokens}
+                              Tokens {valueFormatterNumbers(globalActivity.sum_total_tokens)}
                            </Subtitle>
                            <BarChart
                              className="h-40"
                              data={globalActivity.daily_data}
                              index="date"
                              colors={['cyan']}
+                              valueFormatter={valueFormatterNumbers}
                              categories={['total_tokens']}
                              onValueChange={(v) => console.log(v)}
                            />
--- a/ui/litellm-dashboard/src/components/view_users.tsx
+++ b/ui/litellm-dashboard/src/components/view_users.tsx
@ -24,12 +24,22 @@ import {
  Icon,
  TextInput,
 } from "@tremor/react";
-import { userInfoCall } from "./networking";
+
+import {
+  message,
+} from "antd";
+
+import { userInfoCall, userUpdateUserCall, getPossibleUserRoles } from "./networking";
 import { Badge, BadgeDelta, Button } from "@tremor/react";
 import RequestAccess from "./request_model_access";
 import CreateUser from "./create_user_button";
+import EditUserModal  from "./edit_user";
 import Paragraph from "antd/es/skeleton/Paragraph";
-import InformationCircleIcon from "@heroicons/react/outline/InformationCircleIcon";
+import {
+  PencilAltIcon,
+  InformationCircleIcon,
+  TrashIcon,
+} from "@heroicons/react/outline";

 interface ViewUserDashboardProps {
  accessToken: string | null;
@ -55,8 +65,40 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
  const [currentPage, setCurrentPage] = useState(0);
  const [openDialogId, setOpenDialogId] = React.useState<null | number>(null);
  const [selectedItem, setSelectedItem] = useState<null | any>(null);
+  const [editModalVisible, setEditModalVisible] = useState(false);
+  const [selectedUser, setSelectedUser] = useState(null);
+  const [possibleUIRoles, setPossibleUIRoles] = useState<Record<string, Record<string, string>>>({});
  const defaultPageSize = 25;

+  const handleEditCancel = async () => {
+    setSelectedUser(null);
+    setEditModalVisible(false);
+  };
+  
+  const handleEditSubmit = async (editedUser: any) => {
+    console.log("inside handleEditSubmit:", editedUser);
+
+    if (!accessToken || !token || !userRole || !userID) {
+      return;
+    }
+
+    try {
+      await userUpdateUserCall(accessToken, editedUser, null);
+      message.success(`User ${editedUser.user_id} updated successfully`);
+    } catch (error) {
+      console.error("There was an error updating the user", error);
+    }    
+    if (userData) {
+      const updatedUserData = userData.map((user) =>
+        user.user_id === editedUser.user_id ? editedUser : user
+      );
+      setUserData(updatedUserData);
+    }
+    setSelectedUser(null);
+    setEditModalVisible(false);
+    // Close the modal
+  };
+
  useEffect(() => {
    if (!accessToken || !token || !userRole || !userID) {
      return;
@ -74,11 +116,16 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
        );
        console.log("user data response:", userDataResponse);
        setUserData(userDataResponse);
+
+        const availableUserRoles = await getPossibleUserRoles(accessToken);
+        setPossibleUIRoles(availableUserRoles);
+
      } catch (error) {
        console.error("There was an error fetching the model data", error);
      }
    };

+
    if (accessToken && token && userRole && userID) {
      fetchData();
    }
@ -126,14 +173,10 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({

  return (
    <div style={{ width: "100%" }}>
-      <Grid className="gap-2 p-2 h-[80vh] w-full mt-8">
+      <Grid className="gap-2 p-2 h-[90vh] w-full mt-8">
        <CreateUser userID={userID} accessToken={accessToken} teams={teams} />
-        <Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4">
+        <Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[90vh] mb-4">
          <div className="mb-4 mt-1">
-            <Text>
-              These are Users on LiteLLM that created API Keys. Automatically
-              tracked by LiteLLM
-            </Text>
          </div>
          <TabGroup>
            <TabPanels>
@ -143,25 +186,23 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
                    <TableRow>
                      <TableHeaderCell>User ID</TableHeaderCell>
                      <TableHeaderCell>User Email</TableHeaderCell>
-                      <TableHeaderCell>User Models</TableHeaderCell>
+                      <TableHeaderCell>Role</TableHeaderCell>
                      <TableHeaderCell>User Spend ($ USD)</TableHeaderCell>
                      <TableHeaderCell>User Max Budget ($ USD)</TableHeaderCell>
-                      <TableHeaderCell>User API Key Aliases</TableHeaderCell>
+                      <TableHeaderCell>API Keys</TableHeaderCell>
+                      <TableHeaderCell></TableHeaderCell>
                    </TableRow>
                  </TableHead>
                  <TableBody>
                    {userData.map((user: any) => (
                      <TableRow key={user.user_id}>
-                        <TableCell>{user.user_id}</TableCell>
-                        <TableCell>{user.user_email}</TableCell>
-
+                        <TableCell>{user.user_id || "-"}</TableCell>
+                        <TableCell>{user.user_email || "-"}</TableCell>
                        <TableCell>
-                          {user.models && user.models.length > 0
-                            ? user.models
-                            : "All Models"}
+                          {possibleUIRoles?.[user?.user_role]?.ui_label || "-"}
                        </TableCell>
                        <TableCell>
-                          {user.spend ? user.spend?.toFixed(2) : 0}
+                          {user.spend ? user.spend?.toFixed(2) : "-"}
                        </TableCell>
                        <TableCell>
                          {user.max_budget ? user.max_budget : "Unlimited"}
@ -173,9 +214,13 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
                                (key: any) => key !== null
                              ).length > 0 ? (
                                <Badge size={"xs"} color={"indigo"}>
-                                  {user.key_aliases
-                                    .filter((key: any) => key !== null)
-                                    .join(", ")}
+                                  {
+                                    user.key_aliases.filter(
+                                      (key: any) => key !== null
+                                    ).length
+                                    
+                                  }
+                                   &nbsp;Keys
                                </Badge>
                              ) : (
                                <Badge size={"xs"} color={"gray"}>
@ -188,12 +233,23 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
                              </Badge>
                            )}
                            {/* <Text>{user.key_aliases.filter(key => key !== null).length} Keys</Text> */}
-                            {/* <Icon icon={InformationCircleIcon} onClick= {() => {
+                            </Grid>
+                          </TableCell>
+                        <TableCell>
+                         
+                         <Icon icon={PencilAltIcon} onClick= {() => {
+                          setSelectedUser(user)
+                          setEditModalVisible(true)
+                        }}>View Keys</Icon>
+{/* 
+                        <Icon icon={TrashIcon} onClick= {() => {
                          setOpenDialogId(user.user_id)
                          setSelectedItem(user)
                        }}>View Keys</Icon> */}
-                          </Grid>
+
                        </TableCell>
+                     
+                 
                      </TableRow>
                    ))}
                  </TableBody>
@ -226,30 +282,16 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
              </TabPanel>
            </TabPanels>
          </TabGroup>
+          <EditUserModal
+          visible={editModalVisible}
+          possibleUIRoles={possibleUIRoles}
+          onCancel={handleEditCancel}
+          user={selectedUser}
+          onSubmit={handleEditSubmit}
+        />
        </Card>
        {renderPagination()}
      </Grid>
-      {/* <Dialog
-  open={openDialogId !== null}
-  onClose={() => {
-    setOpenDialogId(null);
-  }}
-
->
-  <DialogPanel>
-  <div className="grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3">
-    <Title>Key Aliases</Title>
-
-    <Text>
-    {selectedItem && selectedItem.key_aliases
- ? selectedItem.key_aliases.filter(key => key !== null).length > 0
-   ? selectedItem.key_aliases.filter(key => key !== null).join(', ')
-   : 'No Keys'
- : "No Keys"}
-    </Text>
-    </div>
-  </DialogPanel>
-</Dialog> */}
    </div>
  );
 };
				`@ -0,0 +1 @@`
				(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);