Merge branch 'main' of github.com:lunary-ai/litellm

2024-02-28 22:18:43 -08:00 · 2024-02-28 22:18:43 -08:00 · ab415d5165
commit ab415d5165
parent 40eebc6bea f98619e6f2
68 changed files with 2676 additions and 1126 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -130,6 +130,7 @@ jobs:
            pip install "langfuse>=2.0.0"
            pip install numpydoc
            pip install prisma            
+            pip install fastapi            
            pip install "httpx==0.24.1"
            pip install "gunicorn==21.2.0"
            pip install "anyio==3.7.1"
--- a/docs/my-website/docs/completion/function_call.md
+++ b/docs/my-website/docs/completion/function_call.md
@ -1,18 +1,25 @@
 # Function Calling 
-Function calling is supported with the following models on OpenAI, Azure OpenAI

- gpt-4
- gpt-4-1106-preview
- gpt-4-0613
- gpt-3.5-turbo
- gpt-3.5-turbo-1106
- gpt-3.5-turbo-0613
- Non OpenAI LLMs (litellm adds the function call to the prompt for these llms)
+## Checking if a model supports function calling 

-In addition, parallel function calls is supported on the following models:
- gpt-4-1106-preview
- gpt-3.5-turbo-1106
+Use `litellm.supports_function_calling(model="")` -> returns `True` if model supports Function calling, `False` if not

+```python
+assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
+assert litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
+assert litellm.supports_function_calling(model="palm/chat-bison") == False
+assert litellm.supports_function_calling(model="ollama/llama2") == False
+```
+
+
+## Checking if a model supports parallel function calling 
+
+Use `litellm.supports_parallel_function_calling(model="")` -> returns `True` if model supports parallel function calling, `False` if not
+
+```python
+assert litellm.supports_parallel_function_calling(model="gpt-4-turbo-preview") == True
+assert litellm.supports_parallel_function_calling(model="gpt-4") == False
+```
 ## Parallel Function calling
 Parallel function calling is the model's ability to perform multiple function calls together, allowing the effects and results of these function calls to be resolved in parallel

--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -291,7 +291,6 @@ Here's an example of using a bedrock model with LiteLLM
 | Anthropic Claude-V2.1      | `completion(model='bedrock/anthropic.claude-v2:1', messages=messages)`   | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']`           |
 | Anthropic Claude-V2      | `completion(model='bedrock/anthropic.claude-v2', messages=messages)`   | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']`           |
 | Anthropic Claude-Instant V1 | `completion(model='bedrock/anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']`           |
-| Anthropic Claude-V1      | `completion(model='bedrock/anthropic.claude-v1', messages=messages)`   | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']`           |
 | Amazon Titan Lite            | `completion(model='bedrock/amazon.titan-text-lite-v1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | Amazon Titan Express              | `completion(model='bedrock/amazon.titan-text-express-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | Cohere Command              | `completion(model='bedrock/cohere.command-text-v14', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
--- a/docs/my-website/docs/proxy/metrics.md
+++ b/docs/my-website/docs/proxy/metrics.md
@ -0,0 +1,44 @@
+# 💸 GET Daily Spend, Usage Metrics
+
+## Request Format
+```shell
+curl -X GET "http://0.0.0.0:4000/daily_metrics" -H "Authorization: Bearer sk-1234"
+```
+
+## Response format 
+```json
+[
+    daily_spend = [
+        {
+            "daily_spend": 7.9261938052047e+16,
+            "day": "2024-02-01T00:00:00",
+            "spend_per_model": {"azure/gpt-4": 7.9261938052047e+16},
+            "spend_per_api_key": {
+                "76": 914495704992000.0,
+                "12": 905726697912000.0,
+                "71": 866312628003000.0,
+                "28": 865461799332000.0,
+                "13": 859151538396000.0
+            }
+        },
+        {
+            "daily_spend": 7.938489251309491e+16,
+            "day": "2024-02-02T00:00:00",
+            "spend_per_model": {"gpt-3.5": 7.938489251309491e+16},
+            "spend_per_api_key": {
+                "91": 896805036036000.0,
+                "78": 889692646082000.0,
+                "49": 885386687861000.0,
+                "28": 873869890984000.0,
+                "56": 867398637692000.0
+            }
+        }
+
+    ],
+    total_spend = 200,
+    top_models = {"gpt4": 0.2, "vertexai/gemini-pro":10},
+    top_api_keys = {"899922": 0.9, "838hcjd999seerr88": 20}
+
+]
+
+```
--- a/docs/my-website/docs/proxy/ui.md
+++ b/docs/my-website/docs/proxy/ui.md
@ -186,6 +186,20 @@ If you don't see all your keys this could be due to a cached token. So just re-l

 :::

+### Restrict UI Access
+
+You can restrict UI Access to just admins - includes you (proxy_admin) and people you give view only access to (proxy_admin_viewer) for seeing global spend.
+
+**Step 1. Set 'admin_only' access**
+```yaml
+general_settings:
+    ui_access_mode: "admin_only"
+```
+
+**Step 2. Invite view-only users**
+
+<Image img={require('../../img/admin_ui_viewer.png')} />
+
 ### Custom Branding Admin UI

 Use your companies custom branding on the LiteLLM Admin UI
--- a/docs/my-website/img/admin_ui_viewer.png
+++ b/docs/my-website/img/admin_ui_viewer.png
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -40,6 +40,7 @@ const sidebars = {
        "proxy/virtual_keys",
        "proxy/users",
        "proxy/ui",
+        "proxy/metrics",
        "proxy/model_management",
        "proxy/health",
        "proxy/debugging",
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
            "log_count": num_rows,
        }
        return response_data
+
+
+def _create_clickhouse_material_views(client=None, table_names=[]):
+    # Create Materialized Views if they don't exist
+    # Materialized Views send new inserted rows to the aggregate tables
+
+    verbose_logger.debug("Clickhouse: Creating Materialized Views")
+    if "daily_aggregated_spend_per_model_mv" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
+            TO daily_aggregated_spend_per_model
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend,
+                model as model
+            FROM spend_logs
+            GROUP BY
+                day, model
+            """
+        )
+    if "daily_aggregated_spend_per_api_key_mv" not in table_names:
+        verbose_logger.debug(
+            "Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
+        )
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
+            TO daily_aggregated_spend_per_api_key
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend,
+                api_key as api_key
+            FROM spend_logs
+            GROUP BY
+                day, api_key
+            """
+        )
+    if "daily_aggregated_spend_per_user_mv" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
+            TO daily_aggregated_spend_per_user
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend,
+                user as user
+            FROM spend_logs
+            GROUP BY
+                day, user
+            """
+        )
+    if "daily_aggregated_spend_mv" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
+            TO daily_aggregated_spend
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend
+            FROM spend_logs
+            GROUP BY
+                day
+            """
+        )
+
+
+def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
+    # Basic Logging works without this - this is only used for low latency reporting apis
+    verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
+
+    # Create Aggregeate Tables if they don't exist
+    if "daily_aggregated_spend_per_model" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend_per_model
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+                `model` String
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day, model);
+            """
+        )
+    if "daily_aggregated_spend_per_api_key" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend_per_api_key
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+                `api_key` String
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day, api_key);
+            """
+        )
+    if "daily_aggregated_spend_per_user" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend_per_user
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+                `user` String
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day, user);
+            """
+        )
+    if "daily_aggregated_spend" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day);
+            """
+        )
+    return
--- a/litellm/init.py
+++ b/litellm/init.py
@ -549,6 +549,8 @@ from .utils import (
    token_counter,
    cost_per_token,
    completion_cost,
+    supports_function_calling,
+    supports_parallel_function_calling,
    get_litellm_params,
    Logging,
    acreate,
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -27,6 +27,151 @@ import litellm, uuid
 from litellm._logging import print_verbose, verbose_logger


+def create_client():
+    try:
+        import clickhouse_connect
+
+        port = os.getenv("CLICKHOUSE_PORT")
+        clickhouse_host = os.getenv("CLICKHOUSE_HOST")
+        if clickhouse_host is not None:
+            verbose_logger.debug("setting up clickhouse")
+            if port is not None and isinstance(port, str):
+                port = int(port)
+
+            client = clickhouse_connect.get_client(
+                host=os.getenv("CLICKHOUSE_HOST"),
+                port=port,
+                username=os.getenv("CLICKHOUSE_USERNAME"),
+                password=os.getenv("CLICKHOUSE_PASSWORD"),
+            )
+            return client
+        else:
+            raise Exception("Clickhouse: Clickhouse host not set")
+    except Exception as e:
+        raise ValueError(f"Clickhouse: {e}")
+
+
+def build_daily_metrics():
+    click_house_client = create_client()
+
+    # get daily spend
+    daily_spend = click_house_client.query_df(
+        """
+        SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
+        """
+    )
+
+    # get daily spend per model
+    daily_spend_per_model = click_house_client.query_df(
+        """
+        SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
+        """
+    )
+    new_df = daily_spend_per_model.to_dict(orient="records")
+    import pandas as pd
+
+    df = pd.DataFrame(new_df)
+    # Group by 'day' and create a dictionary for each group
+    result_dict = {}
+    for day, group in df.groupby("day"):
+        models = group["model"].tolist()
+        spend = group["daily_spend"].tolist()
+        spend_per_model = {model: spend for model, spend in zip(models, spend)}
+        result_dict[day] = spend_per_model
+
+    # Display the resulting dictionary
+
+    # get daily spend per API key
+    daily_spend_per_api_key = click_house_client.query_df(
+        """
+            SELECT
+                daily_spend,
+                day,
+                api_key
+            FROM (
+                SELECT
+                    sumMerge(DailySpend) as daily_spend,
+                    day,
+                    api_key,
+                    RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
+                FROM
+                    daily_aggregated_spend_per_api_key
+                GROUP BY
+                    day,
+                    api_key
+            ) AS ranked_api_keys
+            WHERE
+                spend_rank <= 5
+                AND day IS NOT NULL
+            ORDER BY
+                day,
+                daily_spend DESC
+        """
+    )
+    new_df = daily_spend_per_api_key.to_dict(orient="records")
+    import pandas as pd
+
+    df = pd.DataFrame(new_df)
+    # Group by 'day' and create a dictionary for each group
+    api_key_result_dict = {}
+    for day, group in df.groupby("day"):
+        api_keys = group["api_key"].tolist()
+        spend = group["daily_spend"].tolist()
+        spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
+        api_key_result_dict[day] = spend_per_api_key
+
+    # Display the resulting dictionary
+
+    # Calculate total spend across all days
+    total_spend = daily_spend["daily_spend"].sum()
+
+    # Identify top models and top API keys with the highest spend across all days
+    top_models = {}
+    top_api_keys = {}
+
+    for day, spend_per_model in result_dict.items():
+        for model, model_spend in spend_per_model.items():
+            if model not in top_models or model_spend > top_models[model]:
+                top_models[model] = model_spend
+
+    for day, spend_per_api_key in api_key_result_dict.items():
+        for api_key, api_key_spend in spend_per_api_key.items():
+            if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
+                top_api_keys[api_key] = api_key_spend
+
+    # for each day in daily spend, look up the day in result_dict and api_key_result_dict
+    # Assuming daily_spend DataFrame has 'day' column
+    result = []
+    for index, row in daily_spend.iterrows():
+        day = row["day"]
+        data_day = row.to_dict()
+
+        # Look up in result_dict
+        if day in result_dict:
+            spend_per_model = result_dict[day]
+            # Assuming there is a column named 'model' in daily_spend
+            data_day["spend_per_model"] = spend_per_model  # Assign 0 if model not found
+
+        # Look up in api_key_result_dict
+        if day in api_key_result_dict:
+            spend_per_api_key = api_key_result_dict[day]
+            # Assuming there is a column named 'api_key' in daily_spend
+            data_day["spend_per_api_key"] = spend_per_api_key
+
+        result.append(data_day)
+
+    data_to_return = {}
+    data_to_return["daily_spend"] = result
+
+    data_to_return["total_spend"] = total_spend
+    data_to_return["top_models"] = top_models
+    data_to_return["top_api_keys"] = top_api_keys
+    return data_to_return
+
+
+# build_daily_metrics()
+
+
 def _start_clickhouse():
    import clickhouse_connect

@ -86,6 +231,14 @@ def _start_clickhouse():
            response = client.query("DESCRIBE default.spend_logs")
            verbose_logger.debug(f"spend logs schema ={response.result_rows}")

+        # RUN Enterprise Clickhouse Setup
+        # TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
+        from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
+        from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
+
+        _create_clickhouse_aggregate_tables(client=client, table_names=table_names)
+        _create_clickhouse_material_views(client=client, table_names=table_names)
+

 class ClickhouseLogger:
    # Class variables or attributes
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -278,7 +278,11 @@ def completion(
        import google.auth

        ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
+        print_verbose(
+            f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
+        )
        creds, _ = google.auth.default(quota_project_id=vertex_project)
+        print_verbose(f"VERTEX AI: creds={creds}")
        vertexai.init(
            project=vertex_project, location=vertex_location, credentials=creds
        )
--- a/litellm/main.py
+++ b/litellm/main.py
@ -10,7 +10,6 @@
 import os, openai, sys, json, inspect, uuid, datetime, threading
 from typing import Any, Literal, Union
 from functools import partial
-
 import dotenv, traceback, random, asyncio, time, contextvars
 from copy import deepcopy
 import httpx
@ -1468,12 +1467,14 @@ def completion(
            response = model_response
        elif custom_llm_provider == "vertex_ai":
            vertex_ai_project = (
-                optional_params.pop("vertex_ai_project", None)
+                optional_params.pop("vertex_project", None)
+                or optional_params.pop("vertex_ai_project", None)
                or litellm.vertex_project
                or get_secret("VERTEXAI_PROJECT")
            )
            vertex_ai_location = (
-                optional_params.pop("vertex_ai_location", None)
+                optional_params.pop("vertex_location", None)
+                or optional_params.pop("vertex_ai_location", None)
                or litellm.vertex_location
                or get_secret("VERTEXAI_LOCATION")
            )
@ -2567,12 +2568,14 @@ def embedding(
            )
        elif custom_llm_provider == "vertex_ai":
            vertex_ai_project = (
-                optional_params.pop("vertex_ai_project", None)
+                optional_params.pop("vertex_project", None)
+                or optional_params.pop("vertex_ai_project", None)
                or litellm.vertex_project
                or get_secret("VERTEXAI_PROJECT")
            )
            vertex_ai_location = (
-                optional_params.pop("vertex_ai_location", None)
+                optional_params.pop("vertex_location", None)
+                or optional_params.pop("vertex_ai_location", None)
                or litellm.vertex_location
                or get_secret("VERTEXAI_LOCATION")
            )
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -6,7 +6,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-4-turbo-preview": {
        "max_tokens": 8192, 
@ -15,7 +16,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-4-0314": {
        "max_tokens": 8192,
@ -33,7 +36,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-4-32k": {
        "max_tokens": 32768,
@ -69,7 +73,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-4-0125-preview": {
        "max_tokens": 128000,
@ -78,7 +84,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-4-vision-preview": {
        "max_tokens": 128000,
@ -105,7 +113,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-3.5-turbo-0301": {
        "max_tokens": 4097,
@ -123,7 +132,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-3.5-turbo-1106": {
        "max_tokens": 16385,
@ -132,7 +142,9 @@
        "input_cost_per_token": 0.0000010,
        "output_cost_per_token": 0.0000020,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-3.5-turbo-0125": {
        "max_tokens": 16385,
@ -141,7 +153,9 @@
        "input_cost_per_token": 0.0000005,
        "output_cost_per_token": 0.0000015,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-3.5-turbo-16k": {
        "max_tokens": 16385,
@ -286,7 +300,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-4-1106-preview": {
        "max_tokens": 128000,
@ -295,7 +311,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-4-0613": {
        "max_tokens": 8192,
@ -304,7 +322,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/gpt-4-32k-0613": {
        "max_tokens": 32768,
@ -331,7 +350,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/gpt-4-turbo": {
        "max_tokens": 128000,
@ -340,7 +360,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "azure", 
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-4-turbo-vision-preview": {
        "max_tokens": 128000,
@ -358,7 +380,8 @@
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000004,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/gpt-35-turbo-1106": {
        "max_tokens": 16384,
@ -367,7 +390,20 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-35-turbo-0125": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000005,
+        "output_cost_per_token": 0.0000015,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-35-turbo-16k": {
        "max_tokens": 16385,
@ -385,7 +421,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/ada": {
        "max_tokens": 8191,
@ -514,11 +551,12 @@
        "mode": "chat"
    },
    "mistral/mistral-large-latest": {
-        "max_tokens": 8192,
+        "max_tokens": 32000,
        "input_cost_per_token": 0.000008,
        "output_cost_per_token": 0.000024,
        "litellm_provider": "mistral",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "mistral/mistral-embed": {
        "max_tokens": 8192,
@ -676,7 +714,8 @@
        "input_cost_per_token": 0.00000025, 
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "vertex_ai-language-models",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gemini-1.5-pro": { 
        "max_tokens": 8192,
@ -1738,6 +1777,23 @@
        "output_cost_per_token": 0.0000009,
        "litellm_provider": "together_ai"
    },
+    "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "input_cost_per_token": 0.0000006,
+        "output_cost_per_token": 0.0000006,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "together_ai/togethercomputer/CodeLlama-34b-Instruct": {
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
    "ollama/llama2": {
        "max_tokens": 4096,
        "input_cost_per_token": 0.0,
@ -1990,7 +2046,16 @@
        "input_cost_per_token": 0.00000015, 
        "output_cost_per_token": 0.00000015,
        "litellm_provider": "anyscale", 
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
+      },
+      "anyscale/Mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 16384, 
+        "input_cost_per_token": 0.00000015, 
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "anyscale", 
+        "mode": "chat",
+        "supports_function_calling": true
      },
      "anyscale/HuggingFaceH4/zephyr-7b-beta": {
        "max_tokens": 16384, 
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-63b99fef05dd144b.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-63b99fef05dd144b.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8f65fc157f538dff.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8f65fc157f538dff.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-d4fe4a48cbd3572c.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-d4fe4a48cbd3572c.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-12184ee6a95c1363.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/webpack-12184ee6a95c1363.js
@ -1 +1 @@
-!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/6920a121699cde9c.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
+!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a40ad0909dd7838e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
--- a/litellm/proxy/_experimental/out/_next/static/css/6920a121699cde9c.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/6920a121699cde9c.css
--- a/litellm/proxy/_experimental/out/_next/static/css/a40ad0909dd7838e.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/a40ad0909dd7838e.css
--- a/litellm/proxy/_experimental/out/_next/static/kyOCJPBB9pyUfbMKCAXr-/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/kyOCJPBB9pyUfbMKCAXr-/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/kyOCJPBB9pyUfbMKCAXr-/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/kyOCJPBB9pyUfbMKCAXr-/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/6920a121699cde9c.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[24143,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-d4fe4a48cbd3572c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/6920a121699cde9c.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"eSwVwl_InIrhYtCAqDMKF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-8f65fc157f538dff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kyOCJPBB9pyUfbMKCAXr-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[24143,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-d4fe4a48cbd3572c.js"],""]
+3:I[30280,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-8f65fc157f538dff.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["eSwVwl_InIrhYtCAqDMKF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/6920a121699cde9c.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["kyOCJPBB9pyUfbMKCAXr-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a40ad0909dd7838e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -221,12 +221,19 @@ class NewUserResponse(GenerateKeyResponse):
 class UpdateUserRequest(GenerateRequestBase):
    # Note: the defaults of all Params here MUST BE NONE
    # else they will get overwritten
-    user_id: str
+    user_id: Optional[str] = None
+    user_email: Optional[str] = None
    spend: Optional[float] = None
    metadata: Optional[dict] = None
    user_role: Optional[str] = None
    max_budget: Optional[float] = None

+    @root_validator(pre=True)
+    def check_user_info(cls, values):
+        if values.get("user_id") is None and values.get("user_email") is None:
+            raise ValueError("Either user id or user email must be provided")
+        return values
+

 class Member(LiteLLMBase):
    role: Literal["admin", "user"]
@ -402,6 +409,9 @@ class ConfigGeneralSettings(LiteLLMBase):
        None,
        description="sends alerts if requests hang for 5min+",
    )
+    ui_access_mode: Optional[Literal["admin_only", "all"]] = Field(
+        "all", description="Control access to the Proxy UI"
+    )


 class ConfigYAML(LiteLLMBase):
--- a/litellm/proxy/custom_callbacks.py
+++ b/litellm/proxy/custom_callbacks.py
@ -0,0 +1,66 @@
+from litellm.integrations.custom_logger import CustomLogger
+import litellm
+
+
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+class MyCustomHandler(CustomLogger):
+    def log_pre_api_call(self, model, messages, kwargs):
+        print(f"Pre-API Call")  # noqa
+
+    def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
+        print(f"Post-API Call")  # noqa
+
+    def log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        print(f"On Stream")  # noqa
+
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print("On Success")  # noqa
+
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        print(f"On Failure")  # noqa
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print(f"ishaan async_log_success_event")  # noqa
+        # log: key, user, model, prompt, response, tokens, cost
+        # Access kwargs passed to litellm.completion()
+        model = kwargs.get("model", None)
+        messages = kwargs.get("messages", None)
+        user = kwargs.get("user", None)
+
+        # Access litellm_params passed to litellm.completion(), example access `metadata`
+        litellm_params = kwargs.get("litellm_params", {})
+        metadata = litellm_params.get(
+            "metadata", {}
+        )  # headers passed to LiteLLM proxy, can be found here
+
+        return
+
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        try:
+            print(f"On Async Failure !")  # noqa
+            print("\nkwargs", kwargs)  # noqa
+            # Access kwargs passed to litellm.completion()
+            model = kwargs.get("model", None)
+            messages = kwargs.get("messages", None)
+            user = kwargs.get("user", None)
+
+            # Access litellm_params passed to litellm.completion(), example access `metadata`
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = litellm_params.get(
+                "metadata", {}
+            )  # headers passed to LiteLLM proxy, can be found here
+
+            # Acess Exceptions & Traceback
+            exception_event = kwargs.get("exception", None)
+            traceback_event = kwargs.get("traceback_exception", None)
+
+            # Calculate cost using  litellm.completion_cost()
+        except Exception as e:
+            print(f"Exception: {e}")  # noqa
+
+
+proxy_handler_instance = MyCustomHandler()
+
+# Set litellm.callbacks = [proxy_handler_instance] on the proxy
+# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -45,7 +45,7 @@ litellm_settings:
  fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
  success_callback: ['langfuse']
  # setting callback class
-  # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
+  callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]

 general_settings: 
  master_key: sk-1234
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -239,6 +239,9 @@ health_check_interval = None
 health_check_results = {}
 queue: List = []
 litellm_proxy_budget_name = "litellm-proxy-budget"
+ui_access_mode: Literal["admin", "all"] = "all"
+proxy_budget_rescheduler_min_time = 597
+proxy_budget_rescheduler_max_time = 605
 ### INITIALIZE GLOBAL LOGGING OBJECT ###
 proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
 ### REDIS QUEUE ###
@ -1406,7 +1409,7 @@ class ProxyConfig:
        """
        Load config values into proxy global state
        """
-        global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client
+        global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode

        # Load existing config
        config = await self.get_config(config_file_path=config_file_path)
@ -1713,6 +1716,17 @@ class ProxyConfig:
                )
            ## COST TRACKING ##
            cost_tracking()
+            ## ADMIN UI ACCESS ##
+            ui_access_mode = general_settings.get(
+                "ui_access_mode", "all"
+            )  # can be either ["admin_only" or "all"]
+            ## BUDGET RESCHEDULER ##
+            proxy_budget_rescheduler_min_time = general_settings.get(
+                "proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time
+            )
+            proxy_budget_rescheduler_max_time = general_settings.get(
+                "proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time
+            )
            ### BACKGROUND HEALTH CHECKS ###
            # Enable background health checks
            use_background_health_checks = general_settings.get(
@ -2115,10 +2129,9 @@ async def async_data_generator(response, user_api_key_dict):
    try:
        start_time = time.time()
        async for chunk in response:
-            verbose_proxy_logger.debug(f"returned chunk: {chunk}")
-            assert isinstance(chunk, litellm.ModelResponse)
+            chunk = chunk.model_dump_json(exclude_none=True)
            try:
-                yield f"data: {json.dumps(chunk.model_dump(exclude_none=True))}\n\n"
+                yield f"data: {chunk}\n\n"
            except Exception as e:
                yield f"data: {str(e)}\n\n"

@ -2197,7 +2210,7 @@ def parse_cache_control(cache_control):

@router.on_event("startup")
 async def startup_event():
-    global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings
+    global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
    import json

    ### LOAD MASTER KEY ###
@ -2302,13 +2315,12 @@ async def startup_event():
    ### CHECK IF VIEW EXISTS ###
    if prisma_client is not None:
        create_view_response = await prisma_client.check_view_exists()
-        print(f"create_view_response: {create_view_response}")  # noqa

    ### START BUDGET SCHEDULER ###
    if prisma_client is not None:
        scheduler = AsyncIOScheduler()
        interval = random.randint(
-            597, 605
+            proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
        )  # random interval, so multiple workers avoid resetting budget at the same time
        scheduler.add_job(
            reset_budget, "interval", seconds=interval, args=[prisma_client]
@ -3775,7 +3787,7 @@ async def view_spend_tags(

@router.get(
    "/spend/logs",
-    tags=["budget & spend Tracking"],
+    tags=["Budget & Spend Tracking"],
    dependencies=[Depends(user_api_key_auth)],
    responses={
        200: {"model": List[LiteLLM_SpendLogs]},
@ -3834,13 +3846,55 @@ async def view_spend_logs(
        # gettting spend logs from clickhouse
        from litellm.proxy.enterprise.utils import view_spend_logs_from_clickhouse

-        return await view_spend_logs_from_clickhouse(
-            api_key=api_key,
-            user_id=user_id,
-            request_id=request_id,
+        daily_metrics = await view_daily_metrics(
            start_date=start_date,
            end_date=end_date,
        )
+
+        # get the top api keys across all daily_metrics
+        top_api_keys = {}  # type: ignore
+
+        # make this compatible with the admin UI
+        for response in daily_metrics.get("daily_spend", {}):
+            response["startTime"] = response["day"]
+            response["spend"] = response["daily_spend"]
+            response["models"] = response["spend_per_model"]
+            response["users"] = {"ishaan": 0.0}
+            spend_per_api_key = response["spend_per_api_key"]
+
+            # insert spend_per_api_key key, values in response
+            for key, value in spend_per_api_key.items():
+                response[key] = value
+                top_api_keys[key] = top_api_keys.get(key, 0.0) + value
+
+            del response["day"]
+            del response["daily_spend"]
+            del response["spend_per_model"]
+            del response["spend_per_api_key"]
+
+        # get top 5 api keys
+        top_api_keys = sorted(top_api_keys.items(), key=lambda x: x[1], reverse=True)  # type: ignore
+        top_api_keys = top_api_keys[:5]  # type: ignore
+        top_api_keys = dict(top_api_keys)  # type: ignore
+        """
+        set it like this 
+        {
+            "key" : key,
+            "spend:" : spend
+        }
+        """
+        # we need this to show on the Admin UI
+        response_keys = []
+        for key in top_api_keys.items():
+            response_keys.append(
+                {
+                    "key": key[0],
+                    "spend": key[1],
+                }
+            )
+        daily_metrics["top_api_keys"] = response_keys
+
+        return daily_metrics
    global prisma_client
    try:
        verbose_proxy_logger.debug("inside view_spend_logs")
@ -3993,6 +4047,142 @@ async def view_spend_logs(
        )


+@router.get(
+    "/global/spend/logs",
+    tags=["Budget & Spend Tracking"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def global_spend_logs():
+    """
+    [BETA] This is a beta endpoint. It will change.
+
+    Use this to get global spend (spend per day for last 30d). Admin-only endpoint
+
+    More efficient implementation of /spend/logs, by creating a view over the spend logs table.
+    """
+    global prisma_client
+
+    sql_query = """SELECT * FROM "MonthlyGlobalSpend";"""
+
+    response = await prisma_client.db.query_raw(query=sql_query)
+
+    return response
+
+
+@router.get(
+    "/global/spend/keys",
+    tags=["Budget & Spend Tracking"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def global_spend_keys(
+    limit: int = fastapi.Query(
+        default=None,
+        description="Number of keys to get. Will return Top 'n' keys.",
+    )
+):
+    """
+    [BETA] This is a beta endpoint. It will change.
+
+    Use this to get the top 'n' keys with the highest spend, ordered by spend.
+    """
+    global prisma_client
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+    sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
+
+    response = await prisma_client.db.query_raw(query=sql_query)
+
+    return response
+
+
+@router.get(
+    "/global/spend/models",
+    tags=["Budget & Spend Tracking"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def global_spend_models(
+    limit: int = fastapi.Query(
+        default=None,
+        description="Number of models to get. Will return Top 'n' models.",
+    )
+):
+    """
+    [BETA] This is a beta endpoint. It will change.
+
+    Use this to get the top 'n' keys with the highest spend, ordered by spend.
+    """
+    global prisma_client
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
+
+    response = await prisma_client.db.query_raw(query=sql_query)
+
+    return response
+
+
+@router.get(
+    "/daily_metrics",
+    summary="Get daily spend metrics",
+    tags=["budget & spend Tracking"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def view_daily_metrics(
+    start_date: Optional[str] = fastapi.Query(
+        default=None,
+        description="Time from which to start viewing key spend",
+    ),
+    end_date: Optional[str] = fastapi.Query(
+        default=None,
+        description="Time till which to view key spend",
+    ),
+):
+    """
+    [BETA] This is a beta endpoint. It might change without notice.
+
+    Please give feedback - https://github.com/BerriAI/litellm/issues
+    """
+    try:
+        if os.getenv("CLICKHOUSE_HOST") is not None:
+            # gettting spend logs from clickhouse
+            from litellm.integrations import clickhouse
+
+            return clickhouse.build_daily_metrics()
+
+            # create a response object
+            """
+            {
+                "date": "2022-01-01",
+                "spend": 0.0,
+                "users": {},
+                "models": {},
+            }
+            """
+        else:
+            raise Exception(
+                "Clickhouse: Clickhouse host not set. Required for viewing /daily/metrics"
+            )
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", f"/spend/logs Error({str(e)})"),
+                type="internal_error",
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
+            )
+        elif isinstance(e, ProxyException):
+            raise e
+        raise ProxyException(
+            message="/spend/logs Error" + str(e),
+            type="internal_error",
+            param=getattr(e, "param", "None"),
+            code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        )
+
+
 #### USER MANAGEMENT ####
@router.post(
    "/user/new",
@ -4264,12 +4454,32 @@ async def user_update(data: UpdateUserRequest):
            ):  # models default to [], spend defaults to 0, we should not reset these values
                non_default_values[k] = v

-        response = await prisma_client.update_data(
-            user_id=data_json["user_id"],
-            data=non_default_values,
-            update_key_values=non_default_values,
-        )
-        return {"user_id": data_json["user_id"], **non_default_values}
+        ## ADD USER, IF NEW ##
+        if data.user_id is not None and len(data.user_id) == 0:
+            non_default_values["user_id"] = data.user_id  # type: ignore
+            await prisma_client.update_data(
+                user_id=data.user_id,
+                data=non_default_values,
+                table_name="user",
+            )
+        elif data.user_email is not None:
+            non_default_values["user_id"] = str(uuid.uuid4())
+            non_default_values["user_email"] = data.user_email
+            ## user email is not unique acc. to prisma schema -> future improvement
+            ### for now: check if it exists in db, if not - insert it
+            existing_user_row = await prisma_client.get_data(
+                key_val={"user_email": data.user_email},
+                table_name="user",
+                query_type="find_all",
+            )
+            if existing_user_row is None or (
+                isinstance(existing_user_row, list) and len(existing_user_row) == 0
+            ):
+                await prisma_client.insert_data(
+                    data=non_default_values, table_name="user"
+                )
+
+        return non_default_values
        # update based on remaining passed in values
    except Exception as e:
        traceback.print_exc()
@ -4472,6 +4682,40 @@ async def unblock_user(data: BlockUsers):
    return {"blocked_users": litellm.blocked_user_list}


+@router.get(
+    "/user/get_users",
+    tags=["user management"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def get_users(
+    role: str = fastapi.Query(
+        default=None,
+        description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
+    )
+):
+    """
+    [BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues
+
+    Get all users who are a specific `user_role`.
+
+    Used by the UI to populate the user lists.
+
+    Currently - admin-only endpoint.
+    """
+    global prisma_client
+
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=500,
+            detail={"error": f"No db connected. prisma client={prisma_client}"},
+        )
+    all_users = await prisma_client.get_data(
+        table_name="user", query_type="find_all", key_val={"user_role": role}
+    )
+
+    return all_users
+
+
 #### TEAM MANAGEMENT ####


@ -4621,9 +4865,9 @@ async def update_team(
 ):
    """
    [BETA]
-    [DEPRECATED] - use the `/team/member_add` and `/team/member_remove` endpoints instead 
+    [RECOMMENDED] - use `/team/member_add` to add new team members instead 

-    You can now add / delete users from a team via /team/update
+    You can now update team budget / rate limits via /team/update

    ```
    curl --location 'http://0.0.0.0:8000/team/update' \
@ -5620,7 +5864,7 @@ def get_image():
@app.get("/sso/callback", tags=["experimental"])
 async def auth_callback(request: Request):
    """Verify login"""
-    global general_settings
+    global general_settings, ui_access_mode
    microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
    google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
    generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
@ -5811,6 +6055,7 @@ async def auth_callback(request: Request):
        "user_email": user_email,
    }
    try:
+        user_role = None
        if prisma_client is not None:
            user_info = await prisma_client.get_data(user_id=user_id, table_name="user")
            verbose_proxy_logger.debug(
@ -5822,6 +6067,7 @@ async def auth_callback(request: Request):
                    "user_id": getattr(user_info, "user_id", user_id),
                    "user_email": getattr(user_info, "user_id", user_email),
                }
+                user_role = getattr(user_info, "user_role", None)
            elif litellm.default_user_params is not None and isinstance(
                litellm.default_user_params, dict
            ):
@ -5844,13 +6090,27 @@ async def auth_callback(request: Request):
    key = response["token"]  # type: ignore
    user_id = response["user_id"]  # type: ignore
    litellm_dashboard_ui = "/ui/"
-    user_role = "app_owner"
+    user_role = user_role or "app_owner"
    if (
        os.getenv("PROXY_ADMIN_ID", None) is not None
        and os.environ["PROXY_ADMIN_ID"] == user_id
    ):
        # checks if user is admin
        user_role = "app_admin"
+
+    verbose_proxy_logger.debug(
+        f"user_role: {user_role}; ui_access_mode: {ui_access_mode}"
+    )
+    ## CHECK IF ROLE ALLOWED TO USE PROXY ##
+    if ui_access_mode == "admin_only" and "admin" not in user_role:
+        verbose_proxy_logger.debug("EXCEPTION RAISED")
+        raise HTTPException(
+            status_code=401,
+            detail={
+                "error": f"User not allowed to access proxy. User role={user_role}, proxy mode={ui_access_mode}"
+            },
+        )
+
    import jwt

    jwt_token = jwt.encode(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -489,18 +489,20 @@ class PrismaClient:
    )
    async def check_view_exists(self):
        """
-        Checks if the LiteLLM_VerificationTokenView exists in the user's db.
+        Checks if the LiteLLM_VerificationTokenView and MonthlyGlobalSpend exists in the user's db.

-        This is used for getting the token + team data in user_api_key_auth
+        LiteLLM_VerificationTokenView: This view is used for getting the token + team data in user_api_key_auth
+
+        MonthlyGlobalSpend: This view is used for the admin view to see global spend for this month

        If the view doesn't exist, one will be created.
        """
        try:
            # Try to select one row from the view
-            await self.db.execute_raw(
+            await self.db.query_raw(
                """SELECT 1 FROM "LiteLLM_VerificationTokenView" LIMIT 1"""
            )
-            return "LiteLLM_VerificationTokenView Exists!"
+            print("LiteLLM_VerificationTokenView Exists!")  # noqa
        except Exception as e:
            # If an error occurs, the view does not exist, so create it
            value = await self.health_check()
@ -518,7 +520,29 @@ class PrismaClient:
                """
            )

-        return "LiteLLM_VerificationTokenView Created!"
+            print("LiteLLM_VerificationTokenView Created!")  # noqa
+
+        try:
+            await self.db.query_raw("""SELECT 1 FROM "MonthlyGlobalSpend" LIMIT 1""")
+            print("MonthlyGlobalSpend Exists!")  # noqa
+        except Exception as e:
+            sql_query = """
+            CREATE OR REPLACE VIEW "MonthlyGlobalSpend" AS 
+            SELECT
+            DATE("startTime") AS date, 
+            SUM("spend") AS spend 
+            FROM 
+            "LiteLLM_SpendLogs" 
+            WHERE 
+            "startTime" >= (CURRENT_DATE - INTERVAL '30 days')
+            GROUP BY 
+            DATE("startTime");
+            """
+            await self.db.execute_raw(query=sql_query)
+
+            print("MonthlyGlobalSpend Created!")  # noqa
+
+        return

    @backoff.on_exception(
        backoff.expo,
--- a/litellm/tests/test_amazing_s3_logs.py
+++ b/litellm/tests/test_amazing_s3_logs.py
@ -1,253 +1,254 @@
-import sys
-import os
-import io, asyncio
+## @pytest.mark.skip(reason="AWS Suspended Account")
+# import sys
+# import os
+# import io, asyncio

-# import logging
-# logging.basicConfig(level=logging.DEBUG)
-sys.path.insert(0, os.path.abspath("../.."))
+# # import logging
+# # logging.basicConfig(level=logging.DEBUG)
+# sys.path.insert(0, os.path.abspath("../.."))

-from litellm import completion
-import litellm
+# from litellm import completion
+# import litellm

-litellm.num_retries = 3
+# litellm.num_retries = 3

-import time, random
-import pytest
+# import time, random
+# import pytest


-def test_s3_logging():
-    # all s3 requests need to be in one test function
-    # since we are modifying stdout, and pytests runs tests in parallel
-    # on circle ci - we only test litellm.acompletion()
-    try:
-        # redirect stdout to log_file
-        litellm.cache = litellm.Cache(
-            type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
-        )
+# def test_s3_logging():
+#     # all s3 requests need to be in one test function
+#     # since we are modifying stdout, and pytests runs tests in parallel
+#     # on circle ci - we only test litellm.acompletion()
+#     try:
+#         # redirect stdout to log_file
+#         litellm.cache = litellm.Cache(
+#             type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
+#         )

-        litellm.success_callback = ["s3"]
-        litellm.s3_callback_params = {
-            "s3_bucket_name": "litellm-logs",
-            "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
-            "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
-        }
-        litellm.set_verbose = True
+#         litellm.success_callback = ["s3"]
+#         litellm.s3_callback_params = {
+#             "s3_bucket_name": "litellm-logs",
+#             "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
+#             "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
+#         }
+#         litellm.set_verbose = True

-        print("Testing async s3 logging")
+#         print("Testing async s3 logging")

-        expected_keys = []
+#         expected_keys = []

-        import time
+#         import time

-        curr_time = str(time.time())
+#         curr_time = str(time.time())

-        async def _test():
-            return await litellm.acompletion(
-                model="gpt-3.5-turbo",
-                messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
-                max_tokens=10,
-                temperature=0.7,
-                user="ishaan-2",
-            )
+#         async def _test():
+#             return await litellm.acompletion(
+#                 model="gpt-3.5-turbo",
+#                 messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
+#                 max_tokens=10,
+#                 temperature=0.7,
+#                 user="ishaan-2",
+#             )

-        response = asyncio.run(_test())
-        print(f"response: {response}")
-        expected_keys.append(response.id)
+#         response = asyncio.run(_test())
+#         print(f"response: {response}")
+#         expected_keys.append(response.id)

-        async def _test():
-            return await litellm.acompletion(
-                model="gpt-3.5-turbo",
-                messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
-                max_tokens=10,
-                temperature=0.7,
-                user="ishaan-2",
-            )
+#         async def _test():
+#             return await litellm.acompletion(
+#                 model="gpt-3.5-turbo",
+#                 messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
+#                 max_tokens=10,
+#                 temperature=0.7,
+#                 user="ishaan-2",
+#             )

-        response = asyncio.run(_test())
-        expected_keys.append(response.id)
-        print(f"response: {response}")
-        time.sleep(5)  # wait 5s for logs to land
+#         response = asyncio.run(_test())
+#         expected_keys.append(response.id)
+#         print(f"response: {response}")
+#         time.sleep(5)  # wait 5s for logs to land

-        import boto3
+#         import boto3

-        s3 = boto3.client("s3")
-        bucket_name = "litellm-logs"
-        # List objects in the bucket
-        response = s3.list_objects(Bucket=bucket_name)
+#         s3 = boto3.client("s3")
+#         bucket_name = "litellm-logs"
+#         # List objects in the bucket
+#         response = s3.list_objects(Bucket=bucket_name)

-        # Sort the objects based on the LastModified timestamp
-        objects = sorted(
-            response["Contents"], key=lambda x: x["LastModified"], reverse=True
-        )
-        # Get the keys of the most recent objects
-        most_recent_keys = [obj["Key"] for obj in objects]
-        print(most_recent_keys)
-        # for each key, get the part before "-" as the key. Do it safely
-        cleaned_keys = []
-        for key in most_recent_keys:
-            split_key = key.split("_")
-            if len(split_key) < 2:
-                continue
-            cleaned_keys.append(split_key[1])
-        print("\n most recent keys", most_recent_keys)
-        print("\n cleaned keys", cleaned_keys)
-        print("\n Expected keys: ", expected_keys)
-        matches = 0
-        for key in expected_keys:
-            key += ".json"
-            assert key in cleaned_keys
+#         # Sort the objects based on the LastModified timestamp
+#         objects = sorted(
+#             response["Contents"], key=lambda x: x["LastModified"], reverse=True
+#         )
+#         # Get the keys of the most recent objects
+#         most_recent_keys = [obj["Key"] for obj in objects]
+#         print(most_recent_keys)
+#         # for each key, get the part before "-" as the key. Do it safely
+#         cleaned_keys = []
+#         for key in most_recent_keys:
+#             split_key = key.split("_")
+#             if len(split_key) < 2:
+#                 continue
+#             cleaned_keys.append(split_key[1])
+#         print("\n most recent keys", most_recent_keys)
+#         print("\n cleaned keys", cleaned_keys)
+#         print("\n Expected keys: ", expected_keys)
+#         matches = 0
+#         for key in expected_keys:
+#             key += ".json"
+#             assert key in cleaned_keys

-            if key in cleaned_keys:
-                matches += 1
-                # remove the match key
-                cleaned_keys.remove(key)
-        # this asserts we log, the first request + the 2nd cached request
-        print("we had two matches ! passed ", matches)
-        assert matches == 2
-        try:
-            # cleanup s3 bucket in test
-            for key in most_recent_keys:
-                s3.delete_object(Bucket=bucket_name, Key=key)
-        except:
-            # don't let cleanup fail a test
-            pass
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-    finally:
-        # post, close log file and verify
-        # Reset stdout to the original value
-        print("Passed! Testing async s3 logging")
+#             if key in cleaned_keys:
+#                 matches += 1
+#                 # remove the match key
+#                 cleaned_keys.remove(key)
+#         # this asserts we log, the first request + the 2nd cached request
+#         print("we had two matches ! passed ", matches)
+#         assert matches == 2
+#         try:
+#             # cleanup s3 bucket in test
+#             for key in most_recent_keys:
+#                 s3.delete_object(Bucket=bucket_name, Key=key)
+#         except:
+#             # don't let cleanup fail a test
+#             pass
+#     except Exception as e:
+#         pytest.fail(f"An exception occurred - {e}")
+#     finally:
+#         # post, close log file and verify
+#         # Reset stdout to the original value
+#         print("Passed! Testing async s3 logging")


-# test_s3_logging()
+# # test_s3_logging()


-def test_s3_logging_async():
-    # this tests time added to make s3 logging calls, vs just acompletion calls
-    try:
-        litellm.set_verbose = True
-        # Make 5 calls with an empty success_callback
-        litellm.success_callback = []
-        start_time_empty_callback = asyncio.run(make_async_calls())
-        print("done with no callback test")
+# def test_s3_logging_async():
+#     # this tests time added to make s3 logging calls, vs just acompletion calls
+#     try:
+#         litellm.set_verbose = True
+#         # Make 5 calls with an empty success_callback
+#         litellm.success_callback = []
+#         start_time_empty_callback = asyncio.run(make_async_calls())
+#         print("done with no callback test")

-        print("starting s3 logging load test")
-        # Make 5 calls with success_callback set to "langfuse"
-        litellm.success_callback = ["s3"]
-        litellm.s3_callback_params = {
-            "s3_bucket_name": "litellm-logs",
-            "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
-            "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
-        }
-        start_time_s3 = asyncio.run(make_async_calls())
-        print("done with s3 test")
+#         print("starting s3 logging load test")
+#         # Make 5 calls with success_callback set to "langfuse"
+#         litellm.success_callback = ["s3"]
+#         litellm.s3_callback_params = {
+#             "s3_bucket_name": "litellm-logs",
+#             "s3_aws_secret_access_key": "os.environ/AWS_SECRET_ACCESS_KEY",
+#             "s3_aws_access_key_id": "os.environ/AWS_ACCESS_KEY_ID",
+#         }
+#         start_time_s3 = asyncio.run(make_async_calls())
+#         print("done with s3 test")

-        # Compare the time for both scenarios
-        print(f"Time taken with success_callback='s3': {start_time_s3}")
-        print(f"Time taken with empty success_callback: {start_time_empty_callback}")
+#         # Compare the time for both scenarios
+#         print(f"Time taken with success_callback='s3': {start_time_s3}")
+#         print(f"Time taken with empty success_callback: {start_time_empty_callback}")

-        # assert the diff is not more than 1 second
-        assert abs(start_time_s3 - start_time_empty_callback) < 1
+#         # assert the diff is not more than 1 second
+#         assert abs(start_time_s3 - start_time_empty_callback) < 1

-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
+#     except litellm.Timeout as e:
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"An exception occurred - {e}")


-async def make_async_calls():
-    tasks = []
-    for _ in range(5):
-        task = asyncio.create_task(
-            litellm.acompletion(
-                model="azure/chatgpt-v-2",
-                messages=[{"role": "user", "content": "This is a test"}],
-                max_tokens=5,
-                temperature=0.7,
-                timeout=5,
-                user="langfuse_latency_test_user",
-                mock_response="It's simple to use and easy to get started",
-            )
-        )
-        tasks.append(task)
+# async def make_async_calls():
+#     tasks = []
+#     for _ in range(5):
+#         task = asyncio.create_task(
+#             litellm.acompletion(
+#                 model="azure/chatgpt-v-2",
+#                 messages=[{"role": "user", "content": "This is a test"}],
+#                 max_tokens=5,
+#                 temperature=0.7,
+#                 timeout=5,
+#                 user="langfuse_latency_test_user",
+#                 mock_response="It's simple to use and easy to get started",
+#             )
+#         )
+#         tasks.append(task)

-    # Measure the start time before running the tasks
-    start_time = asyncio.get_event_loop().time()
+#     # Measure the start time before running the tasks
+#     start_time = asyncio.get_event_loop().time()

-    # Wait for all tasks to complete
-    responses = await asyncio.gather(*tasks)
+#     # Wait for all tasks to complete
+#     responses = await asyncio.gather(*tasks)

-    # Print the responses when tasks return
-    for idx, response in enumerate(responses):
-        print(f"Response from Task {idx + 1}: {response}")
+#     # Print the responses when tasks return
+#     for idx, response in enumerate(responses):
+#         print(f"Response from Task {idx + 1}: {response}")

-    # Calculate the total time taken
-    total_time = asyncio.get_event_loop().time() - start_time
+#     # Calculate the total time taken
+#     total_time = asyncio.get_event_loop().time() - start_time

-    return total_time
+#     return total_time


-def test_s3_logging_r2():
-    # all s3 requests need to be in one test function
-    # since we are modifying stdout, and pytests runs tests in parallel
-    # on circle ci - we only test litellm.acompletion()
-    try:
-        # redirect stdout to log_file
-        # litellm.cache = litellm.Cache(
-        #     type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
-        # )
-        litellm.set_verbose = True
-        from litellm._logging import verbose_logger
-        import logging
+# def test_s3_logging_r2():
+#     # all s3 requests need to be in one test function
+#     # since we are modifying stdout, and pytests runs tests in parallel
+#     # on circle ci - we only test litellm.acompletion()
+#     try:
+#         # redirect stdout to log_file
+#         # litellm.cache = litellm.Cache(
+#         #     type="s3", s3_bucket_name="litellm-r2-bucket", s3_region_name="us-west-2"
+#         # )
+#         litellm.set_verbose = True
+#         from litellm._logging import verbose_logger
+#         import logging

-        verbose_logger.setLevel(level=logging.DEBUG)
+#         verbose_logger.setLevel(level=logging.DEBUG)

-        litellm.success_callback = ["s3"]
-        litellm.s3_callback_params = {
-            "s3_bucket_name": "litellm-r2-bucket",
-            "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
-            "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
-            "s3_endpoint_url": "os.environ/R2_S3_URL",
-            "s3_region_name": "os.environ/R2_S3_REGION_NAME",
-        }
-        print("Testing async s3 logging")
+#         litellm.success_callback = ["s3"]
+#         litellm.s3_callback_params = {
+#             "s3_bucket_name": "litellm-r2-bucket",
+#             "s3_aws_secret_access_key": "os.environ/R2_S3_ACCESS_KEY",
+#             "s3_aws_access_key_id": "os.environ/R2_S3_ACCESS_ID",
+#             "s3_endpoint_url": "os.environ/R2_S3_URL",
+#             "s3_region_name": "os.environ/R2_S3_REGION_NAME",
+#         }
+#         print("Testing async s3 logging")

-        expected_keys = []
+#         expected_keys = []

-        import time
+#         import time

-        curr_time = str(time.time())
+#         curr_time = str(time.time())

-        async def _test():
-            return await litellm.acompletion(
-                model="gpt-3.5-turbo",
-                messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
-                max_tokens=10,
-                temperature=0.7,
-                user="ishaan-2",
-            )
+#         async def _test():
+#             return await litellm.acompletion(
+#                 model="gpt-3.5-turbo",
+#                 messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
+#                 max_tokens=10,
+#                 temperature=0.7,
+#                 user="ishaan-2",
+#             )

-        response = asyncio.run(_test())
-        print(f"response: {response}")
-        expected_keys.append(response.id)
+#         response = asyncio.run(_test())
+#         print(f"response: {response}")
+#         expected_keys.append(response.id)

-        import boto3
+#         import boto3

-        s3 = boto3.client(
-            "s3",
-            endpoint_url=os.getenv("R2_S3_URL"),
-            region_name=os.getenv("R2_S3_REGION_NAME"),
-            aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
-            aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
-        )
+#         s3 = boto3.client(
+#             "s3",
+#             endpoint_url=os.getenv("R2_S3_URL"),
+#             region_name=os.getenv("R2_S3_REGION_NAME"),
+#             aws_access_key_id=os.getenv("R2_S3_ACCESS_ID"),
+#             aws_secret_access_key=os.getenv("R2_S3_ACCESS_KEY"),
+#         )

-        bucket_name = "litellm-r2-bucket"
-        # List objects in the bucket
-        response = s3.list_objects(Bucket=bucket_name)
+#         bucket_name = "litellm-r2-bucket"
+#         # List objects in the bucket
+#         response = s3.list_objects(Bucket=bucket_name)

-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-    finally:
-        # post, close log file and verify
-        # Reset stdout to the original value
-        print("Passed! Testing async s3 logging")
+#     except Exception as e:
+#         pytest.fail(f"An exception occurred - {e}")
+#     finally:
+#         # post, close log file and verify
+#         # Reset stdout to the original value
+#         print("Passed! Testing async s3 logging")
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -110,6 +110,7 @@ def test_vertex_ai():
                "code-bison@001",
                "text-bison@001",
                "gemini-1.5-pro",
+                "gemini-1.5-pro-preview-0215",
                "gemini-1.5-pro-vision",
            ]:
                # our account does not have access to this model
@ -129,6 +130,8 @@ def test_vertex_ai():
                f"response.choices[0].finish_reason: {response.choices[0].finish_reason}"
            )
            assert response.choices[0].finish_reason in litellm._openai_finish_reasons
+        except litellm.RateLimitError as e:
+            pass
        except Exception as e:
            pytest.fail(f"Error occurred: {e}")

@ -160,6 +163,7 @@ def test_vertex_ai_stream():
                "code-bison@001",
                "text-bison@001",
                "gemini-1.5-pro",
+                "gemini-1.5-pro-preview-0215",
                "gemini-1.5-pro-vision",
            ]:
                # our account does not have access to this model
@ -181,6 +185,8 @@ def test_vertex_ai_stream():
                assert type(content) == str
                # pass
            assert len(completed_str) > 4
+        except litellm.RateLimitError as e:
+            pass
        except Exception as e:
            pytest.fail(f"Error occurred: {e}")

@ -211,6 +217,7 @@ async def test_async_vertexai_response():
            "code-bison@001",
            "text-bison@001",
            "gemini-1.5-pro",
+            "gemini-1.5-pro-preview-0215",
            "gemini-1.5-pro-vision",
        ]:
            # our account does not have access to this model
@ -255,6 +262,7 @@ async def test_async_vertexai_streaming_response():
            "code-bison@001",
            "text-bison@001",
            "gemini-1.5-pro",
+            "gemini-1.5-pro-preview-0215",
            "gemini-1.5-pro-vision",
        ]:
            # our account does not have access to this model
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@ -193,16 +193,26 @@ async def test_hf_completion_tgi():
        # Add any assertions here to check the response
        print(response)
    except litellm.APIError as e:
+        print("got an api error")
        pass
    except litellm.Timeout as e:
+        print("got a timeout error")
+        pass
+    except litellm.RateLimitError as e:
+        # this will catch the model is overloaded error
+        print("got a rate limit error")
        pass
    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+        if "Model is overloaded" in str(e):
+            pass
+        else:
+            pytest.fail(f"Error occurred: {e}")


 # test_get_cloudflare_response_streaming()


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_completion_sagemaker():
    # litellm.set_verbose=True
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@ -1,257 +1,259 @@
-import sys, os
-import traceback
-from dotenv import load_dotenv
+# @pytest.mark.skip(reason="AWS Suspended Account")
+# import sys, os
+# import traceback
+# from dotenv import load_dotenv

-load_dotenv()
-import os, io
+# load_dotenv()
+# import os, io

-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-import pytest
-import litellm
-from litellm import embedding, completion, completion_cost, Timeout
-from litellm import RateLimitError
+# sys.path.insert(
+#     0, os.path.abspath("../..")
+# )  # Adds the parent directory to the system path
+# import pytest
+# import litellm
+# from litellm import embedding, completion, completion_cost, Timeout
+# from litellm import RateLimitError

-# litellm.num_retries = 3
-litellm.cache = None
-litellm.success_callback = []
-user_message = "Write a short poem about the sky"
-messages = [{"content": user_message, "role": "user"}]
+# # litellm.num_retries = 3
+# litellm.cache = None
+# litellm.success_callback = []
+# user_message = "Write a short poem about the sky"
+# messages = [{"content": user_message, "role": "user"}]


-@pytest.fixture(autouse=True)
-def reset_callbacks():
-    print("\npytest fixture - resetting callbacks")
-    litellm.success_callback = []
-    litellm._async_success_callback = []
-    litellm.failure_callback = []
-    litellm.callbacks = []
+# @pytest.fixture(autouse=True)
+# def reset_callbacks():
+#     print("\npytest fixture - resetting callbacks")
+#     litellm.success_callback = []
+#     litellm._async_success_callback = []
+#     litellm.failure_callback = []
+#     litellm.callbacks = []


-def test_completion_bedrock_claude_completion_auth():
-    print("calling bedrock claude completion params auth")
-    import os
+# def test_completion_bedrock_claude_completion_auth():
+#     print("calling bedrock claude completion params auth")
+#     import os

-    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-    aws_region_name = os.environ["AWS_REGION_NAME"]
+#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+#     aws_region_name = os.environ["AWS_REGION_NAME"]

-    os.environ.pop("AWS_ACCESS_KEY_ID", None)
-    os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-    os.environ.pop("AWS_REGION_NAME", None)
+#     os.environ.pop("AWS_ACCESS_KEY_ID", None)
+#     os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+#     os.environ.pop("AWS_REGION_NAME", None)

-    try:
-        response = completion(
-            model="bedrock/anthropic.claude-instant-v1",
-            messages=messages,
-            max_tokens=10,
-            temperature=0.1,
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            aws_region_name=aws_region_name,
-        )
-        # Add any assertions here to check the response
-        print(response)
+#     try:
+#         response = completion(
+#             model="bedrock/anthropic.claude-instant-v1",
+#             messages=messages,
+#             max_tokens=10,
+#             temperature=0.1,
+#             aws_access_key_id=aws_access_key_id,
+#             aws_secret_access_key=aws_secret_access_key,
+#             aws_region_name=aws_region_name,
+#         )
+#         # Add any assertions here to check the response
+#         print(response)

-        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-        os.environ["AWS_REGION_NAME"] = aws_region_name
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+#         os.environ["AWS_REGION_NAME"] = aws_region_name
+#     except RateLimitError:
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")


-# test_completion_bedrock_claude_completion_auth()
+# # test_completion_bedrock_claude_completion_auth()


-def test_completion_bedrock_claude_2_1_completion_auth():
-    print("calling bedrock claude 2.1 completion params auth")
-    import os
+# def test_completion_bedrock_claude_2_1_completion_auth():
+#     print("calling bedrock claude 2.1 completion params auth")
+#     import os

-    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-    aws_region_name = os.environ["AWS_REGION_NAME"]
+#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+#     aws_region_name = os.environ["AWS_REGION_NAME"]

-    os.environ.pop("AWS_ACCESS_KEY_ID", None)
-    os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-    os.environ.pop("AWS_REGION_NAME", None)
-    try:
-        response = completion(
-            model="bedrock/anthropic.claude-v2:1",
-            messages=messages,
-            max_tokens=10,
-            temperature=0.1,
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            aws_region_name=aws_region_name,
-        )
-        # Add any assertions here to check the response
-        print(response)
+#     os.environ.pop("AWS_ACCESS_KEY_ID", None)
+#     os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+#     os.environ.pop("AWS_REGION_NAME", None)
+#     try:
+#         response = completion(
+#             model="bedrock/anthropic.claude-v2:1",
+#             messages=messages,
+#             max_tokens=10,
+#             temperature=0.1,
+#             aws_access_key_id=aws_access_key_id,
+#             aws_secret_access_key=aws_secret_access_key,
+#             aws_region_name=aws_region_name,
+#         )
+#         # Add any assertions here to check the response
+#         print(response)

-        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-        os.environ["AWS_REGION_NAME"] = aws_region_name
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+#         os.environ["AWS_REGION_NAME"] = aws_region_name
+#     except RateLimitError:
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")


-# test_completion_bedrock_claude_2_1_completion_auth()
+# # test_completion_bedrock_claude_2_1_completion_auth()


-def test_completion_bedrock_claude_external_client_auth():
-    print("\ncalling bedrock claude external client auth")
-    import os
+# def test_completion_bedrock_claude_external_client_auth():
+#     print("\ncalling bedrock claude external client auth")
+#     import os

-    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
-    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
-    aws_region_name = os.environ["AWS_REGION_NAME"]
+#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
+#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
+#     aws_region_name = os.environ["AWS_REGION_NAME"]

-    os.environ.pop("AWS_ACCESS_KEY_ID", None)
-    os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
-    os.environ.pop("AWS_REGION_NAME", None)
+#     os.environ.pop("AWS_ACCESS_KEY_ID", None)
+#     os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
+#     os.environ.pop("AWS_REGION_NAME", None)

-    try:
-        import boto3
+#     try:
+#         import boto3

-        litellm.set_verbose = True
+#         litellm.set_verbose = True

-        bedrock = boto3.client(
-            service_name="bedrock-runtime",
-            region_name=aws_region_name,
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
-        )
+#         bedrock = boto3.client(
+#             service_name="bedrock-runtime",
+#             region_name=aws_region_name,
+#             aws_access_key_id=aws_access_key_id,
+#             aws_secret_access_key=aws_secret_access_key,
+#             endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com",
+#         )

-        response = completion(
-            model="bedrock/anthropic.claude-instant-v1",
-            messages=messages,
-            max_tokens=10,
-            temperature=0.1,
-            aws_bedrock_client=bedrock,
-        )
-        # Add any assertions here to check the response
-        print(response)
+#         response = completion(
+#             model="bedrock/anthropic.claude-instant-v1",
+#             messages=messages,
+#             max_tokens=10,
+#             temperature=0.1,
+#             aws_bedrock_client=bedrock,
+#         )
+#         # Add any assertions here to check the response
+#         print(response)

-        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
-        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
-        os.environ["AWS_REGION_NAME"] = aws_region_name
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+#         os.environ["AWS_REGION_NAME"] = aws_region_name
+#     except RateLimitError:
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")


-# test_completion_bedrock_claude_external_client_auth()
+# # test_completion_bedrock_claude_external_client_auth()


-def test_completion_bedrock_claude_sts_client_auth():
-    print("\ncalling bedrock claude external client auth")
-    import os
+# @pytest.mark.skip(reason="Expired token, need to renew")
+# def test_completion_bedrock_claude_sts_client_auth():
+#     print("\ncalling bedrock claude external client auth")
+#     import os

-    aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
-    aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
-    aws_region_name = os.environ["AWS_REGION_NAME"]
-    aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
+#     aws_access_key_id = os.environ["AWS_TEMP_ACCESS_KEY_ID"]
+#     aws_secret_access_key = os.environ["AWS_TEMP_SECRET_ACCESS_KEY"]
+#     aws_region_name = os.environ["AWS_REGION_NAME"]
+#     aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]

-    try:
-        import boto3
+#     try:
+#         import boto3

-        litellm.set_verbose = True
+#         litellm.set_verbose = True

-        response = completion(
-            model="bedrock/anthropic.claude-instant-v1",
-            messages=messages,
-            max_tokens=10,
-            temperature=0.1,
-            aws_region_name=aws_region_name,
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            aws_role_name=aws_role_name,
-            aws_session_name="my-test-session",
-        )
+#         response = completion(
+#             model="bedrock/anthropic.claude-instant-v1",
+#             messages=messages,
+#             max_tokens=10,
+#             temperature=0.1,
+#             aws_region_name=aws_region_name,
+#             aws_access_key_id=aws_access_key_id,
+#             aws_secret_access_key=aws_secret_access_key,
+#             aws_role_name=aws_role_name,
+#             aws_session_name="my-test-session",
+#         )

-        response = embedding(
-            model="cohere.embed-multilingual-v3",
-            input=["hello world"],
-            aws_region_name="us-east-1",
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            aws_role_name=aws_role_name,
-            aws_session_name="my-test-session",
-        )
+#         response = embedding(
+#             model="cohere.embed-multilingual-v3",
+#             input=["hello world"],
+#             aws_region_name="us-east-1",
+#             aws_access_key_id=aws_access_key_id,
+#             aws_secret_access_key=aws_secret_access_key,
+#             aws_role_name=aws_role_name,
+#             aws_session_name="my-test-session",
+#         )

-        response = completion(
-            model="gpt-3.5-turbo",
-            messages=messages,
-            aws_region_name="us-east-1",
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            aws_role_name=aws_role_name,
-            aws_session_name="my-test-session",
-        )
-        # Add any assertions here to check the response
-        print(response)
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+#         response = completion(
+#             model="gpt-3.5-turbo",
+#             messages=messages,
+#             aws_region_name="us-east-1",
+#             aws_access_key_id=aws_access_key_id,
+#             aws_secret_access_key=aws_secret_access_key,
+#             aws_role_name=aws_role_name,
+#             aws_session_name="my-test-session",
+#         )
+#         # Add any assertions here to check the response
+#         print(response)
+#     except RateLimitError:
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")


-test_completion_bedrock_claude_sts_client_auth()
+# # test_completion_bedrock_claude_sts_client_auth()


-def test_provisioned_throughput():
-    try:
-        litellm.set_verbose = True
-        import botocore, json, io
-        import botocore.session
-        from botocore.stub import Stubber
+# def test_provisioned_throughput():
+#     try:
+#         litellm.set_verbose = True
+#         import botocore, json, io
+#         import botocore.session
+#         from botocore.stub import Stubber

-        bedrock_client = botocore.session.get_session().create_client(
-            "bedrock-runtime", region_name="us-east-1"
-        )
+#         bedrock_client = botocore.session.get_session().create_client(
+#             "bedrock-runtime", region_name="us-east-1"
+#         )

-        expected_params = {
-            "accept": "application/json",
-            "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
-            '"max_tokens_to_sample": 256}',
-            "contentType": "application/json",
-            "modelId": "provisioned-model-arn",
-        }
-        response_from_bedrock = {
-            "body": io.StringIO(
-                json.dumps(
-                    {
-                        "completion": " Here is a short poem about the sky:",
-                        "stop_reason": "max_tokens",
-                        "stop": None,
-                    }
-                )
-            ),
-            "contentType": "contentType",
-            "ResponseMetadata": {"HTTPStatusCode": 200},
-        }
+#         expected_params = {
+#             "accept": "application/json",
+#             "body": '{"prompt": "\\n\\nHuman: Hello, how are you?\\n\\nAssistant: ", '
+#             '"max_tokens_to_sample": 256}',
+#             "contentType": "application/json",
+#             "modelId": "provisioned-model-arn",
+#         }
+#         response_from_bedrock = {
+#             "body": io.StringIO(
+#                 json.dumps(
+#                     {
+#                         "completion": " Here is a short poem about the sky:",
+#                         "stop_reason": "max_tokens",
+#                         "stop": None,
+#                     }
+#                 )
+#             ),
+#             "contentType": "contentType",
+#             "ResponseMetadata": {"HTTPStatusCode": 200},
+#         }

-        with Stubber(bedrock_client) as stubber:
-            stubber.add_response(
-                "invoke_model",
-                service_response=response_from_bedrock,
-                expected_params=expected_params,
-            )
-            response = litellm.completion(
-                model="bedrock/anthropic.claude-instant-v1",
-                model_id="provisioned-model-arn",
-                messages=[{"content": "Hello, how are you?", "role": "user"}],
-                aws_bedrock_client=bedrock_client,
-            )
-            print("response stubbed", response)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+#         with Stubber(bedrock_client) as stubber:
+#             stubber.add_response(
+#                 "invoke_model",
+#                 service_response=response_from_bedrock,
+#                 expected_params=expected_params,
+#             )
+#             response = litellm.completion(
+#                 model="bedrock/anthropic.claude-instant-v1",
+#                 model_id="provisioned-model-arn",
+#                 messages=[{"content": "Hello, how are you?", "role": "user"}],
+#                 aws_bedrock_client=bedrock_client,
+#             )
+#             print("response stubbed", response)
+#     except Exception as e:
+#         pytest.fail(f"Error occurred: {e}")


-# test_provisioned_throughput()
+# # test_provisioned_throughput()
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -546,6 +546,7 @@ def test_redis_cache_acompletion_stream():
 # test_redis_cache_acompletion_stream()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_redis_cache_acompletion_stream_bedrock():
    import asyncio

@ -571,7 +572,7 @@ def test_redis_cache_acompletion_stream_bedrock():
        async def call1():
            nonlocal response_1_content
            response1 = await litellm.acompletion(
-                model="bedrock/anthropic.claude-v1",
+                model="bedrock/anthropic.claude-v2",
                messages=messages,
                max_tokens=40,
                temperature=1,
@ -589,7 +590,7 @@ def test_redis_cache_acompletion_stream_bedrock():
        async def call2():
            nonlocal response_2_content
            response2 = await litellm.acompletion(
-                model="bedrock/anthropic.claude-v1",
+                model="bedrock/anthropic.claude-v2",
                messages=messages,
                max_tokens=40,
                temperature=1,
@ -615,6 +616,7 @@ def test_redis_cache_acompletion_stream_bedrock():
        raise e


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_s3_cache_acompletion_stream_azure():
    import asyncio

@ -697,6 +699,7 @@ def test_s3_cache_acompletion_stream_azure():


@pytest.mark.asyncio
+@pytest.mark.skip(reason="AWS Suspended Account")
 async def test_s3_cache_acompletion_azure():
    import asyncio
    import logging
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1404,6 +1404,7 @@ def test_customprompt_together_ai():
 # test_customprompt_together_ai()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_sagemaker():
    try:
        litellm.set_verbose = True
@ -1429,6 +1430,7 @@ def test_completion_sagemaker():
 # test_completion_sagemaker()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_sagemaker_stream():
    try:
        litellm.set_verbose = False
@ -1459,6 +1461,7 @@ def test_completion_sagemaker_stream():
        pytest.fail(f"Error occurred: {e}")


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_chat_sagemaker():
    try:
        messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1483,6 +1486,7 @@ def test_completion_chat_sagemaker():
 # test_completion_chat_sagemaker()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_chat_sagemaker_mistral():
    try:
        messages = [{"role": "user", "content": "Hey, how's it going?"}]
@ -1501,6 +1505,7 @@ def test_completion_chat_sagemaker_mistral():
 # test_completion_chat_sagemaker_mistral()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_titan_null_response():
    try:
        response = completion(
@ -1526,6 +1531,7 @@ def test_completion_bedrock_titan_null_response():
        pytest.fail(f"An error occurred - {str(e)}")


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_titan():
    try:
        response = completion(
@ -1547,6 +1553,7 @@ def test_completion_bedrock_titan():
 # test_completion_bedrock_titan()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_claude():
    print("calling claude")
    try:
@ -1568,6 +1575,7 @@ def test_completion_bedrock_claude():
 # test_completion_bedrock_claude()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_cohere():
    print("calling bedrock cohere")
    litellm.set_verbose = True
@ -1954,12 +1962,15 @@ def test_completion_gemini():
    messages = [{"role": "user", "content": "Hey, how's it going?"}]
    try:
        response = completion(model=model_name, messages=messages)
-        # Add any assertions here to check the response
+        # Add any assertions,here to check the response
        print(response)
    except litellm.APIError as e:
        pass
    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+        if "InternalServerError" in str(e):
+            pass
+        else:
+            pytest.fail(f"Error occurred: {e}")


 # test_completion_gemini()
@ -1974,8 +1985,13 @@ async def test_acompletion_gemini():
        response = await litellm.acompletion(model=model_name, messages=messages)
        # Add any assertions here to check the response
        print(f"response: {response}")
+    except litellm.APIError as e:
+        pass
    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
+        if "InternalServerError" in str(e):
+            pass
+        else:
+            pytest.fail(f"Error occurred: {e}")


 # Palm tests
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -171,6 +171,7 @@ def test_cost_openai_image_gen():
    assert cost == 0.019922944


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_cost_bedrock_pricing():
    """
    - get pricing specific to region for a model
@ -226,6 +227,7 @@ def test_cost_bedrock_pricing():
    assert cost == predicted_cost


+@pytest.mark.skip(reason="AWS disabled our access")
 def test_cost_bedrock_pricing_actual_calls():
    litellm.set_verbose = True
    model = "anthropic.claude-instant-v1"
--- a/litellm/tests/test_configs/test_config_no_auth.yaml
+++ b/litellm/tests/test_configs/test_config_no_auth.yaml
@ -80,16 +80,6 @@ model_list:
    description: this is a test openai model
    id: 9b1ef341-322c-410a-8992-903987fef439
  model_name: test_openai_models
- litellm_params:
-    model: bedrock/amazon.titan-embed-text-v1
-  model_info:
-    mode: embedding
-  model_name: amazon-embeddings
- litellm_params:
-    model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
-  model_info:
-    mode: embedding
-  model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
 - litellm_params:
    model: dall-e-3
  model_info:
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -478,17 +478,18 @@ async def test_async_chat_azure_stream():


 ## Test Bedrock + sync
+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_chat_bedrock_stream():
    try:
        customHandler = CompletionCustomHandler()
        litellm.callbacks = [customHandler]
        response = litellm.completion(
-            model="bedrock/anthropic.claude-v1",
+            model="bedrock/anthropic.claude-v2",
            messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
        )
        # test streaming
        response = litellm.completion(
-            model="bedrock/anthropic.claude-v1",
+            model="bedrock/anthropic.claude-v2",
            messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
            stream=True,
        )
@ -497,7 +498,7 @@ def test_chat_bedrock_stream():
        # test failure callback
        try:
            response = litellm.completion(
-                model="bedrock/anthropic.claude-v1",
+                model="bedrock/anthropic.claude-v2",
                messages=[{"role": "user", "content": "Hi 👋 - i'm sync bedrock"}],
                aws_region_name="my-bad-region",
                stream=True,
@ -518,18 +519,19 @@ def test_chat_bedrock_stream():


 ## Test Bedrock + Async
+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_async_chat_bedrock_stream():
    try:
        customHandler = CompletionCustomHandler()
        litellm.callbacks = [customHandler]
        response = await litellm.acompletion(
-            model="bedrock/anthropic.claude-v1",
+            model="bedrock/anthropic.claude-v2",
            messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
        )
        # test streaming
        response = await litellm.acompletion(
-            model="bedrock/anthropic.claude-v1",
+            model="bedrock/anthropic.claude-v2",
            messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
            stream=True,
        )
@ -540,7 +542,7 @@ async def test_async_chat_bedrock_stream():
        ## test failure callback
        try:
            response = await litellm.acompletion(
-                model="bedrock/anthropic.claude-v1",
+                model="bedrock/anthropic.claude-v2",
                messages=[{"role": "user", "content": "Hi 👋 - i'm async bedrock"}],
                aws_region_name="my-bad-key",
                stream=True,
@ -561,6 +563,7 @@ async def test_async_chat_bedrock_stream():


 ## Test Sagemaker + Async
+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_async_chat_sagemaker_stream():
    try:
@ -793,6 +796,7 @@ async def test_async_embedding_azure():


 ## Test Bedrock + Async
+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_async_embedding_bedrock():
    try:
--- a/litellm/tests/test_custom_logger.py
+++ b/litellm/tests/test_custom_logger.py
@ -388,6 +388,7 @@ async def test_async_custom_handler_embedding_optional_param():
 # asyncio.run(test_async_custom_handler_embedding_optional_param())


+@pytest.mark.skip(reason="AWS Account suspended. Pending their approval")
@pytest.mark.asyncio
 async def test_async_custom_handler_embedding_optional_param_bedrock():
    """
--- a/litellm/tests/test_dynamodb_logs.py
+++ b/litellm/tests/test_dynamodb_logs.py
@ -67,6 +67,7 @@ def verify_log_file(log_file_path):
    assert success_count == 3  # Expect 3 success logs from dynamoDB


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_dynamo_logging():
    # all dynamodb requests need to be in one test function
    # since we are modifying stdout, and pytests runs tests in parallel
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@ -256,6 +256,7 @@ async def test_vertexai_aembedding():
        pytest.fail(f"Error occurred: {e}")


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_embedding_titan():
    try:
        # this tests if we support str input for bedrock embedding
@ -301,6 +302,7 @@ def test_bedrock_embedding_titan():
 # test_bedrock_embedding_titan()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_embedding_cohere():
    try:
        litellm.set_verbose = False
@ -422,6 +424,7 @@ def test_aembedding_azure():
 # test_aembedding_azure()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_sagemaker_embeddings():
    try:
        response = litellm.embedding(
@ -438,6 +441,7 @@ def test_sagemaker_embeddings():
        pytest.fail(f"Error occurred: {e}")


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_sagemaker_aembeddings():
    try:
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -42,6 +42,7 @@ exception_models = [


 # Test 1: Context Window Errors
+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.parametrize("model", exception_models)
 def test_context_window(model):
    print("Testing context window error")
@ -120,9 +121,9 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
            os.environ["AI21_API_KEY"] = "bad-key"
        elif "togethercomputer" in model:
            temporary_key = os.environ["TOGETHERAI_API_KEY"]
-            os.environ[
-                "TOGETHERAI_API_KEY"
-            ] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
+            os.environ["TOGETHERAI_API_KEY"] = (
+                "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
+            )
        elif model in litellm.openrouter_models:
            temporary_key = os.environ["OPENROUTER_API_KEY"]
            os.environ["OPENROUTER_API_KEY"] = "bad-key"
--- a/litellm/tests/test_health_check.py
+++ b/litellm/tests/test_health_check.py
@ -87,6 +87,7 @@ async def test_azure_img_gen_health_check():
 # asyncio.run(test_azure_img_gen_health_check())


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_sagemaker_embedding_health_check():
    response = await litellm.ahealth_check(
--- a/litellm/tests/test_image_generation.py
+++ b/litellm/tests/test_image_generation.py
@ -121,6 +121,7 @@ async def test_async_image_generation_azure():
            pytest.fail(f"An exception occurred - {str(e)}")


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_image_generation_bedrock():
    try:
        litellm.set_verbose = True
@ -141,6 +142,7 @@ def test_image_generation_bedrock():
            pytest.fail(f"An exception occurred - {str(e)}")


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_aimage_generation_bedrock_with_optional_params():
    try:
--- a/litellm/tests/test_key_generate_dynamodb.py
+++ b/litellm/tests/test_key_generate_dynamodb.py
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -80,6 +80,14 @@ request_data = {

@pytest.fixture
 def prisma_client():
+    from litellm.proxy.proxy_cli import append_query_params
+
+    ### add connection pool + pool timeout args
+    params = {"connection_limit": 100, "pool_timeout": 60}
+    database_url = os.getenv("DATABASE_URL")
+    modified_url = append_query_params(database_url, params)
+    os.environ["DATABASE_URL"] = modified_url
+
    # Assuming DBClient is a class that needs to be instantiated
    prisma_client = PrismaClient(
        database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
@ -1633,3 +1641,99 @@ async def test_key_with_no_permissions(prisma_client):
    except Exception as e:
        print("Got Exception", e)
        print(e.message)
+
+
+async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
+    from litellm import ModelResponse, Choices, Message, Usage
+    from litellm.proxy.proxy_server import (
+        _PROXY_track_cost_callback as track_cost_callback,
+    )
+
+    import uuid
+
+    request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
+    resp = ModelResponse(
+        id=request_id,
+        choices=[
+            Choices(
+                finish_reason=None,
+                index=0,
+                message=Message(
+                    content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                    role="assistant",
+                ),
+            )
+        ],
+        model="gpt-35-turbo",  # azure always has model written like this
+        usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+    )
+    await track_cost_callback(
+        kwargs={
+            "call_type": "acompletion",
+            "model": "sagemaker-chatgpt-v-2",
+            "stream": True,
+            "complete_streaming_response": resp,
+            "litellm_params": {
+                "metadata": {
+                    "user_api_key": hash_token(generated_key),
+                    "user_api_key_user_id": user_id,
+                }
+            },
+            "response_cost": 0.00005,
+        },
+        completion_response=resp,
+        start_time=datetime.now(),
+        end_time=datetime.now(),
+    )
+
+
+@pytest.mark.skip(reason="High traffic load test for spend tracking")
+@pytest.mark.asyncio
+async def test_proxy_load_test_db(prisma_client):
+    """
+    Run 1500 req./s against track_cost_callback function
+    """
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    from litellm._logging import verbose_proxy_logger
+    import logging, time
+
+    litellm.set_verbose = True
+    verbose_proxy_logger.setLevel(logging.DEBUG)
+    try:
+        start_time = time.time()
+        await litellm.proxy.proxy_server.prisma_client.connect()
+        request = GenerateKeyRequest(max_budget=0.00001)
+        key = await generate_key_fn(request)
+        print(key)
+
+        generated_key = key.key
+        user_id = key.user_id
+        bearer_token = "Bearer " + generated_key
+
+        request = Request(scope={"type": "http"})
+        request._url = URL(url="/chat/completions")
+
+        # use generated key to auth in
+        result = await user_api_key_auth(request=request, api_key=bearer_token)
+        print("result from user auth with new key", result)
+        # update spend using track_cost callback, make 2nd request, it should fail
+        n = 5000
+        tasks = [
+            track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
+            for _ in range(n)
+        ]
+        completions = await asyncio.gather(*tasks)
+        await asyncio.sleep(120)
+        try:
+            # call spend logs
+            spend_logs = await view_spend_logs(api_key=generated_key)
+
+            print(f"len responses: {len(spend_logs)}")
+            assert len(spend_logs) == n
+            print(n, time.time() - start_time, len(spend_logs))
+        except:
+            print(n, time.time() - start_time, 0)
+        raise Exception(f"it worked! key={key.key}")
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {str(e)}")
--- a/litellm/tests/test_model_max_token_adjust.py
+++ b/litellm/tests/test_model_max_token_adjust.py
@ -12,6 +12,7 @@ import litellm
 from litellm import completion


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_sagemaker():
    litellm.set_verbose = True
    litellm.drop_params = True
--- a/litellm/tests/test_provider_specific_config.py
+++ b/litellm/tests/test_provider_specific_config.py
@ -473,6 +473,7 @@ def aleph_alpha_test_completion():
 #  Sagemaker


+@pytest.mark.skip(reason="AWS Suspended Account")
 def sagemaker_test_completion():
    litellm.SagemakerConfig(max_new_tokens=10)
    # litellm.set_verbose=True
@ -514,6 +515,7 @@ def sagemaker_test_completion():
 #  Bedrock


+@pytest.mark.skip(reason="AWS Suspended Account")
 def bedrock_test_completion():
    litellm.AmazonCohereConfig(max_tokens=10)
    # litellm.set_verbose=True
--- a/litellm/tests/test_proxy_server.py
+++ b/litellm/tests/test_proxy_server.py
@ -125,6 +125,7 @@ def test_embedding(client_no_auth):
        pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_embedding(client_no_auth):
    global headers
    from litellm.proxy.proxy_server import user_custom_auth
@ -145,6 +146,7 @@ def test_bedrock_embedding(client_no_auth):
        pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_sagemaker_embedding(client_no_auth):
    global headers
    from litellm.proxy.proxy_server import user_custom_auth
--- a/litellm/tests/test_proxy_server_caching.py
+++ b/litellm/tests/test_proxy_server_caching.py
@ -61,6 +61,7 @@ def generate_random_word(length=4):
    return "".join(random.choice(letters) for _ in range(length))


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_chat_completion(client_no_auth):
    global headers
    try:
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -166,14 +166,6 @@ def test_call_one_endpoint():
                "tpm": 240000,
                "rpm": 1800,
            },
-            {
-                "model_name": "claude-v1",
-                "litellm_params": {
-                    "model": "bedrock/anthropic.claude-instant-v1",
-                },
-                "tpm": 100000,
-                "rpm": 10000,
-            },
            {
                "model_name": "text-embedding-ada-002",
                "litellm_params": {
@ -202,15 +194,6 @@ def test_call_one_endpoint():
            )
            print("\n response", response)

-        async def call_bedrock_claude():
-            response = await router.acompletion(
-                model="bedrock/anthropic.claude-instant-v1",
-                messages=[{"role": "user", "content": "hello this request will pass"}],
-                specific_deployment=True,
-            )
-
-            print("\n response", response)
-
        async def call_azure_embedding():
            response = await router.aembedding(
                model="azure/azure-embedding-model",
@ -221,7 +204,6 @@ def test_call_one_endpoint():
            print("\n response", response)

        asyncio.run(call_azure_completion())
-        asyncio.run(call_bedrock_claude())
        asyncio.run(call_azure_embedding())

        os.environ["AZURE_API_BASE"] = old_api_base
@ -593,6 +575,7 @@ def test_azure_embedding_on_router():
 # test_azure_embedding_on_router()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_bedrock_on_router():
    litellm.set_verbose = True
    print("\n Testing bedrock on router\n")
--- a/litellm/tests/test_router_timeout.py
+++ b/litellm/tests/test_router_timeout.py
@ -87,6 +87,7 @@ def test_router_timeouts():
        print("********** TOKENS USED SO FAR = ", total_tokens_used)


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_router_timeouts_bedrock():
    import openai
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -764,6 +764,7 @@ def test_completion_replicate_stream_bad_key():
 # test_completion_replicate_stream_bad_key()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_claude_stream():
    try:
        litellm.set_verbose = False
@ -810,6 +811,7 @@ def test_completion_bedrock_claude_stream():
 # test_completion_bedrock_claude_stream()


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_bedrock_ai21_stream():
    try:
        litellm.set_verbose = False
@ -911,6 +913,7 @@ def test_sagemaker_weird_response():
 # test_sagemaker_weird_response()


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_sagemaker_streaming_async():
    try:
@ -949,6 +952,7 @@ async def test_sagemaker_streaming_async():
 # asyncio.run(test_sagemaker_streaming_async())


+@pytest.mark.skip(reason="AWS Suspended Account")
 def test_completion_sagemaker_stream():
    try:
        response = completion(
@ -1075,8 +1079,6 @@ async def test_hf_completion_tgi_stream():
            if finished:
                break
            idx += 1
-        if complete_response.strip() == "":
-            raise Exception("Empty response received")
        print(f"completion_response: {complete_response}")
    except litellm.ServiceUnavailableError as e:
        pass
--- a/litellm/tests/test_utils.py
+++ b/litellm/tests/test_utils.py
@ -317,3 +317,24 @@ def test_token_counter():


 # test_token_counter()
+
+
+def test_supports_function_calling():
+    try:
+        assert litellm.supports_function_calling(model="gpt-3.5-turbo") == True
+        assert (
+            litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True
+        )
+        assert (
+            litellm.supports_function_calling(model="anthropic.claude-instant-v1")
+            == False
+        )
+        assert litellm.supports_function_calling(model="palm/chat-bison") == False
+        assert litellm.supports_function_calling(model="ollama/llama2") == False
+        assert (
+            litellm.supports_function_calling(model="anthropic.claude-instant-v1")
+            == False
+        )
+        assert litellm.supports_function_calling(model="claude-2") == False
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -205,18 +205,18 @@ def map_finish_reason(

 class FunctionCall(OpenAIObject):
    arguments: str
-    name: str
+    name: Optional[str] = None


 class Function(OpenAIObject):
    arguments: str
-    name: str
+    name: Optional[str] = None


 class ChatCompletionDeltaToolCall(OpenAIObject):
-    id: str
+    id: Optional[str] = None
    function: Function
-    type: str
+    type: Optional[str] = None
    index: int


@ -275,13 +275,19 @@ class Delta(OpenAIObject):
        super(Delta, self).__init__(**params)
        self.content = content
        self.role = role
-        self.function_call = function_call
-        if tool_calls is not None and isinstance(tool_calls, dict):
+        if function_call is not None and isinstance(function_call, dict):
+            self.function_call = FunctionCall(**function_call)
+        else:
+            self.function_call = function_call
+        if tool_calls is not None and isinstance(tool_calls, list):
            self.tool_calls = []
            for tool_call in tool_calls:
-                if tool_call.get("index", None) is None:
-                    tool_call["index"] = 0
-                self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
+                if isinstance(tool_call, dict):
+                    if tool_call.get("index", None) is None:
+                        tool_call["index"] = 0
+                    self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
+                elif isinstance(tool_call, ChatCompletionDeltaToolCall):
+                    self.tool_calls.append(tool_call)
        else:
            self.tool_calls = tool_calls

@ -1636,7 +1642,7 @@ class Logging:
            verbose_logger.debug(
                "Async success callbacks: Got a complete streaming response"
            )
-            self.model_call_details["complete_streaming_response"] = (
+            self.model_call_details["async_complete_streaming_response"] = (
                complete_streaming_response
            )
            try:
@ -1684,28 +1690,31 @@ class Logging:
                    print_verbose("async success_callback: reaches cache for logging!")
                    kwargs = self.model_call_details
                    if self.stream:
-                        if "complete_streaming_response" not in kwargs:
+                        if "async_complete_streaming_response" not in kwargs:
                            print_verbose(
-                                f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
+                                f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
                            )
                            pass
                        else:
                            print_verbose(
-                                "async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
+                                "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
                            )
-                            result = kwargs["complete_streaming_response"]
+                            result = kwargs["async_complete_streaming_response"]
                            # only add to cache once we have a complete streaming response
                            litellm.cache.add_cache(result, **kwargs)
                if isinstance(callback, CustomLogger):  # custom logger class
                    print_verbose(
-                        f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
+                        f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
                    )
                    if self.stream == True:
-                        if "complete_streaming_response" in self.model_call_details:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
                            await callback.async_log_success_event(
                                kwargs=self.model_call_details,
                                response_obj=self.model_call_details[
-                                    "complete_streaming_response"
+                                    "async_complete_streaming_response"
                                ],
                                start_time=start_time,
                                end_time=end_time,
@ -1726,14 +1735,18 @@ class Logging:
                        )
                if callable(callback):  # custom logger functions
                    print_verbose(
-                        f"Making async function logging call - {self.model_call_details}"
+                        f"Making async function logging call for {callback}, result={result} - {self.model_call_details}"
                    )
                    if self.stream:
-                        if "complete_streaming_response" in self.model_call_details:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
+
                            await customLogger.async_log_event(
                                kwargs=self.model_call_details,
                                response_obj=self.model_call_details[
-                                    "complete_streaming_response"
+                                    "async_complete_streaming_response"
                                ],
                                start_time=start_time,
                                end_time=end_time,
@ -1754,14 +1767,17 @@ class Logging:
                    if dynamoLogger is None:
                        dynamoLogger = DyanmoDBLogger()
                    if self.stream:
-                        if "complete_streaming_response" in self.model_call_details:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
                            print_verbose(
                                "DynamoDB Logger: Got Stream Event - Completed Stream Response"
                            )
                            await dynamoLogger._async_log_event(
                                kwargs=self.model_call_details,
                                response_obj=self.model_call_details[
-                                    "complete_streaming_response"
+                                    "async_complete_streaming_response"
                                ],
                                start_time=start_time,
                                end_time=end_time,
@ -3715,6 +3731,54 @@ def completion_cost(
        raise e


+def supports_function_calling(model: str):
+    """
+    Check if the given model supports function calling and return a boolean value.
+
+    Parameters:
+    model (str): The model name to be checked.
+
+    Returns:
+    bool: True if the model supports function calling, False otherwise.
+
+    Raises:
+    Exception: If the given model is not found in model_prices_and_context_window.json.
+    """
+    if model in litellm.model_cost:
+        model_info = litellm.model_cost[model]
+        if model_info.get("supports_function_calling", False):
+            return True
+        return False
+    else:
+        raise Exception(
+            f"Model not in model_prices_and_context_window.json. You passed model={model}."
+        )
+
+
+def supports_parallel_function_calling(model: str):
+    """
+    Check if the given model supports parallel function calling and return True if it does, False otherwise.
+
+    Parameters:
+        model (str): The model to check for support of parallel function calling.
+
+    Returns:
+        bool: True if the model supports parallel function calling, False otherwise.
+
+    Raises:
+        Exception: If the model is not found in the model_cost dictionary.
+    """
+    if model in litellm.model_cost:
+        model_info = litellm.model_cost[model]
+        if model_info.get("supports_parallel_function_calling", False):
+            return True
+        return False
+    else:
+        raise Exception(
+            f"Model not in model_prices_and_context_window.json. You passed model={model}."
+        )
+
+
 ####### HELPER FUNCTIONS ################
 def register_model(model_cost: Union[str, dict]):
    """
@ -4043,6 +4107,7 @@ def get_optional_params(
            and custom_llm_provider != "vertex_ai"
            and custom_llm_provider != "anyscale"
            and custom_llm_provider != "together_ai"
+            and custom_llm_provider != "mistral"
        ):
            if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
                # ollama actually supports json output
@ -4713,7 +4778,14 @@ def get_optional_params(
        if max_tokens:
            optional_params["max_tokens"] = max_tokens
    elif custom_llm_provider == "mistral":
-        supported_params = ["temperature", "top_p", "stream", "max_tokens"]
+        supported_params = [
+            "temperature",
+            "top_p",
+            "stream",
+            "max_tokens",
+            "tools",
+            "tool_choice",
+        ]
        _check_valid_arg(supported_params=supported_params)
        if temperature is not None:
            optional_params["temperature"] = temperature
@ -4723,6 +4795,10 @@ def get_optional_params(
            optional_params["stream"] = stream
        if max_tokens is not None:
            optional_params["max_tokens"] = max_tokens
+        if tools is not None:
+            optional_params["tools"] = tools
+        if tool_choice is not None:
+            optional_params["tool_choice"] = tool_choice

        # check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
        safe_mode = passed_params.pop("safe_mode", None)
@ -6947,7 +7023,7 @@ def exception_type(
                if "500 An internal error has occurred." in error_str:
                    exception_mapping_worked = True
                    raise APIError(
-                        status_code=original_exception.status_code,
+                        status_code=getattr(original_exception, "status_code", 500),
                        message=f"PalmException - {original_exception.message}",
                        llm_provider="palm",
                        model=model,
@ -8730,7 +8806,7 @@ class CustomStreamWrapper:
                        or original_chunk.choices[0].delta.tool_calls is not None
                    ):
                        try:
-                            delta = dict(original_chunk.choices[0].delta)
+                            delta = original_chunk.choices[0].delta
                            model_response.system_fingerprint = (
                                original_chunk.system_fingerprint
                            )
@ -8765,7 +8841,9 @@ class CustomStreamWrapper:
                                                is None
                                            ):
                                                t.function.arguments = ""
-                            model_response.choices[0].delta = Delta(**delta)
+                            _json_delta = delta.model_dump()
+                            print_verbose(f"_json_delta: {_json_delta}")
+                            model_response.choices[0].delta = Delta(**_json_delta)
                        except Exception as e:
                            traceback.print_exc()
                            model_response.choices[0].delta = Delta()
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -6,7 +6,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-4-turbo-preview": {
        "max_tokens": 8192, 
@ -15,7 +16,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-4-0314": {
        "max_tokens": 8192,
@ -33,7 +36,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-4-32k": {
        "max_tokens": 32768,
@ -69,7 +73,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-4-0125-preview": {
        "max_tokens": 128000,
@ -78,7 +84,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-4-vision-preview": {
        "max_tokens": 128000,
@ -105,7 +113,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-3.5-turbo-0301": {
        "max_tokens": 4097,
@ -123,7 +132,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gpt-3.5-turbo-1106": {
        "max_tokens": 16385,
@ -132,7 +142,9 @@
        "input_cost_per_token": 0.0000010,
        "output_cost_per_token": 0.0000020,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-3.5-turbo-0125": {
        "max_tokens": 16385,
@ -141,7 +153,9 @@
        "input_cost_per_token": 0.0000005,
        "output_cost_per_token": 0.0000015,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "gpt-3.5-turbo-16k": {
        "max_tokens": 16385,
@ -286,7 +300,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-4-1106-preview": {
        "max_tokens": 128000,
@ -295,7 +311,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-4-0613": {
        "max_tokens": 8192,
@ -304,7 +322,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/gpt-4-32k-0613": {
        "max_tokens": 32768,
@ -331,7 +350,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/gpt-4-turbo": {
        "max_tokens": 128000,
@ -340,7 +360,9 @@
        "input_cost_per_token": 0.00001,
        "output_cost_per_token": 0.00003,
        "litellm_provider": "azure", 
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-4-turbo-vision-preview": {
        "max_tokens": 128000,
@ -358,7 +380,8 @@
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000004,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/gpt-35-turbo-1106": {
        "max_tokens": 16384,
@ -367,7 +390,20 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "azure/gpt-35-turbo-0125": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000005,
+        "output_cost_per_token": 0.0000015,
+        "litellm_provider": "azure",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
    },
    "azure/gpt-35-turbo-16k": {
        "max_tokens": 16385,
@ -385,7 +421,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "azure",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "azure/ada": {
        "max_tokens": 8191,
@ -514,11 +551,12 @@
        "mode": "chat"
    },
    "mistral/mistral-large-latest": {
-        "max_tokens": 8192,
+        "max_tokens": 32000,
        "input_cost_per_token": 0.000008,
        "output_cost_per_token": 0.000024,
        "litellm_provider": "mistral",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "mistral/mistral-embed": {
        "max_tokens": 8192,
@ -676,7 +714,8 @@
        "input_cost_per_token": 0.00000025, 
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "vertex_ai-language-models",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
    },
    "gemini-1.5-pro": { 
        "max_tokens": 8192,
@ -1738,6 +1777,23 @@
        "output_cost_per_token": 0.0000009,
        "litellm_provider": "together_ai"
    },
+    "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "input_cost_per_token": 0.0000006,
+        "output_cost_per_token": 0.0000006,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "together_ai/mistralai/Mistral-7B-Instruct-v0.1": {
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
+    "together_ai/togethercomputer/CodeLlama-34b-Instruct": {
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true
+    },
    "ollama/llama2": {
        "max_tokens": 4096,
        "input_cost_per_token": 0.0,
@ -1990,7 +2046,16 @@
        "input_cost_per_token": 0.00000015, 
        "output_cost_per_token": 0.00000015,
        "litellm_provider": "anyscale", 
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
+      },
+      "anyscale/Mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 16384, 
+        "input_cost_per_token": 0.00000015, 
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "anyscale", 
+        "mode": "chat",
+        "supports_function_calling": true
      },
      "anyscale/HuggingFaceH4/zephyr-7b-beta": {
        "max_tokens": 16384, 
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -40,6 +40,8 @@ litellm_settings:
  budget_duration: 30d
 general_settings: 
  master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
+  proxy_budget_rescheduler_min_time: 30
+  proxy_budget_rescheduler_max_time: 60
  # database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy

 environment_variables:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.27.12"
+version = "1.28.0"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.27.12"
+version = "1.28.0"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/requirements.txt
+++ b/requirements.txt
@ -10,6 +10,7 @@ gunicorn==21.2.0 # server dep
 boto3==1.34.34 # aws bedrock/sagemaker calls
 redis==5.0.0 # caching
 numpy==1.24.3 # semantic caching
+pandas==2.1.1 # for viewing clickhouse spend analytics
 prisma==0.11.0 # for db
 mangum==0.17.0 # for aws lambda functions
 google-generativeai==0.3.2 # for vertex ai calls
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -449,7 +449,7 @@ async def test_key_with_budgets():
        reset_at_init_value = key_info["info"]["budget_reset_at"]
        reset_at_new_value = None
        i = 0
-        await asyncio.sleep(610)
+        await asyncio.sleep(120)
        while i < 3:
            key_info = await get_key_info(session=session, get_key=key, call_key=key)
            reset_at_new_value = key_info["info"]["budget_reset_at"]
@ -490,6 +490,7 @@ async def test_key_crossing_budget():
            assert "ExceededTokenBudget: Current spend for token:" in str(e)


+@pytest.mark.skip(reason="AWS Suspended Account")
@pytest.mark.asyncio
 async def test_key_info_spend_values_sagemaker():
    """
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a697f24d60c9c262.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/6920a121699cde9c.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[24143,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-d4fe4a48cbd3572c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/6920a121699cde9c.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"eSwVwl_InIrhYtCAqDMKF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-12184ee6a95c1363.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[30280,[\"303\",\"static/chunks/303-d80f23087a9e6aec.js\",\"931\",\"static/chunks/app/page-8f65fc157f538dff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a40ad0909dd7838e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kyOCJPBB9pyUfbMKCAXr-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[24143,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-d4fe4a48cbd3572c.js"],""]
+3:I[30280,["303","static/chunks/303-d80f23087a9e6aec.js","931","static/chunks/app/page-8f65fc157f538dff.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["eSwVwl_InIrhYtCAqDMKF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/6920a121699cde9c.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["kyOCJPBB9pyUfbMKCAXr-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a40ad0909dd7838e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -6,6 +6,7 @@ import UserDashboard from "../components/user_dashboard";
 import ModelDashboard from "@/components/model_dashboard";
 import ViewUserDashboard from "@/components/view_users";
 import Teams from "@/components/teams";
+import AdminPanel from "@/components/admins";
 import ChatUI from "@/components/chat_ui";
 import Sidebar from "../components/leftnav";
 import Usage from "../components/usage";
@ -73,6 +74,10 @@ const CreateKeyPage = () => {
        return "App Owner";
      case "app_admin":
        return "Admin";
+      case "proxy_admin":
+        return "Admin";
+      case "proxy_admin_viewer":
+        return "Admin Viewer";
      case "app_user":
        return "App User";
      default:
@ -133,6 +138,12 @@ const CreateKeyPage = () => {
              searchParams={searchParams}
              accessToken={accessToken}
            />
+          ) : page == "admin-panel" ? (
+            <AdminPanel
+              setTeams={setTeams}
+              searchParams={searchParams}
+              accessToken={accessToken}
+            />
          ) : (
            <Usage
              userID={userID}
--- a/ui/litellm-dashboard/src/components/admins.tsx
+++ b/ui/litellm-dashboard/src/components/admins.tsx
@ -0,0 +1,228 @@
+/**
+ * Allow proxy admin to add other people to view global spend
+ * Use this to avoid sharing master key with others
+ */
+import React, { useState, useEffect } from "react";
+import { Typography } from "antd";
+import {
+  Button as Button2,
+  Modal,
+  Form,
+  Input,
+  Select as Select2,
+  InputNumber,
+  message,
+} from "antd";
+import { Select, SelectItem } from "@tremor/react";
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeaderCell,
+  TableRow,
+  Card,
+  Icon,
+  Button,
+  Col,
+  Text,
+  Grid,
+} from "@tremor/react";
+import { CogIcon } from "@heroicons/react/outline";
+interface AdminPanelProps {
+  searchParams: any;
+  accessToken: string | null;
+  setTeams: React.Dispatch<React.SetStateAction<Object[] | null>>;
+}
+import {
+  userUpdateUserCall,
+  Member,
+  userGetAllUsersCall,
+  User,
+} from "./networking";
+
+const AdminPanel: React.FC<AdminPanelProps> = ({
+  searchParams,
+  accessToken,
+}) => {
+  const [form] = Form.useForm();
+  const [memberForm] = Form.useForm();
+  const { Title, Paragraph } = Typography;
+  const [value, setValue] = useState("");
+  const [admins, setAdmins] = useState<null | any[]>(null);
+
+  const [isAddMemberModalVisible, setIsAddMemberModalVisible] = useState(false);
+
+  useEffect(() => {
+    // Fetch model info and set the default selected model
+    const fetchProxyAdminInfo = async () => {
+      if (accessToken != null) {
+        const combinedList: any[] = [];
+        const proxyViewers = await userGetAllUsersCall(
+          accessToken,
+          "proxy_admin_viewer"
+        );
+        proxyViewers.forEach((viewer: User) => {
+          combinedList.push({
+            user_role: viewer.user_role,
+            user_id: viewer.user_id,
+            user_email: viewer.user_email,
+          });
+        });
+
+        console.log(`proxy viewers: ${proxyViewers}`);
+
+        const proxyAdmins = await userGetAllUsersCall(
+          accessToken,
+          "proxy_admin"
+        );
+
+        proxyAdmins.forEach((admins: User) => {
+          combinedList.push({
+            user_role: admins.user_role,
+            user_id: admins.user_id,
+            user_email: admins.user_email,
+          });
+        });
+
+        console.log(`proxy admins: ${proxyAdmins}`);
+        console.log(`combinedList: ${combinedList}`);
+        setAdmins(combinedList);
+      }
+    };
+
+    fetchProxyAdminInfo();
+  }, [accessToken]);
+
+  const handleMemberOk = () => {
+    setIsAddMemberModalVisible(false);
+    memberForm.resetFields();
+  };
+
+  const handleMemberCancel = () => {
+    setIsAddMemberModalVisible(false);
+    memberForm.resetFields();
+  };
+
+  const handleMemberCreate = async (formValues: Record<string, any>) => {
+    try {
+      if (accessToken != null && admins != null) {
+        message.info("Making API Call");
+        const user_role: Member = {
+          role: "user",
+          user_email: formValues.user_email,
+          user_id: formValues.user_id,
+        };
+        const response: any = await userUpdateUserCall(accessToken, formValues);
+        console.log(`response for team create call: ${response}`);
+        // Checking if the team exists in the list and updating or adding accordingly
+        const foundIndex = admins.findIndex((user) => {
+          console.log(
+            `user.user_id=${user.user_id}; response.user_id=${response.user_id}`
+          );
+          return user.user_id === response.user_id;
+        });
+        console.log(`foundIndex: ${foundIndex}`);
+        if (foundIndex == -1) {
+          console.log(`updates admin with new user`);
+          admins.push(response);
+          // If new user is found, update it
+          setAdmins(admins); // Set the new state
+        }
+        setIsAddMemberModalVisible(false);
+      }
+    } catch (error) {
+      console.error("Error creating the key:", error);
+    }
+  };
+  console.log(`admins: ${admins?.length}`);
+  return (
+    <div className="w-full m-2">
+      <Title level={4}>Proxy Admins</Title>
+      <Paragraph>
+        Add other people to just view global spend. They cannot create teams or
+        grant users access to new models.
+      </Paragraph>
+      <Grid numItems={1} className="gap-2 p-0 w-full">
+        <Col numColSpan={1}>
+          <Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[50vh]">
+            <Table>
+              <TableHead>
+                <TableRow>
+                  <TableHeaderCell>Member Name</TableHeaderCell>
+                  <TableHeaderCell>Role</TableHeaderCell>
+                  <TableHeaderCell>Action</TableHeaderCell>
+                </TableRow>
+              </TableHead>
+
+              <TableBody>
+                {admins
+                  ? admins.map((member: any, index: number) => (
+                      <TableRow key={index}>
+                        <TableCell>
+                          {member["user_email"]
+                            ? member["user_email"]
+                            : member["user_id"]
+                            ? member["user_id"]
+                            : null}
+                        </TableCell>
+                        <TableCell>{member["user_role"]}</TableCell>
+                        <TableCell>
+                          <Icon icon={CogIcon} size="sm" />
+                        </TableCell>
+                      </TableRow>
+                    ))
+                  : null}
+              </TableBody>
+            </Table>
+          </Card>
+        </Col>
+        <Col numColSpan={1}>
+          <Button
+            className="mx-auto mb-5"
+            onClick={() => setIsAddMemberModalVisible(true)}
+          >
+            + Add viewer
+          </Button>
+          <Modal
+            title="Add viewer"
+            visible={isAddMemberModalVisible}
+            width={800}
+            footer={null}
+            onOk={handleMemberOk}
+            onCancel={handleMemberCancel}
+          >
+            <Form
+              form={form}
+              onFinish={handleMemberCreate}
+              labelCol={{ span: 8 }}
+              wrapperCol={{ span: 16 }}
+              labelAlign="left"
+            >
+              <>
+                <Form.Item label="Email" name="user_email" className="mb-4">
+                  <Input
+                    name="user_email"
+                    className="px-3 py-2 border rounded-md w-full"
+                  />
+                </Form.Item>
+                <div className="text-center mb-4">OR</div>
+                <Form.Item label="User ID" name="user_id" className="mb-4">
+                  <Input
+                    name="user_id"
+                    className="px-3 py-2 border rounded-md w-full"
+                  />
+                </Form.Item>
+              </>
+              <div style={{ textAlign: "right", marginTop: "10px" }}>
+                <Button2 htmlType="submit">Add member</Button2>
+              </div>
+            </Form>
+          </Modal>
+        </Col>
+      </Grid>
+    </div>
+  );
+};
+
+export default AdminPanel;
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@ -46,6 +46,11 @@ const Sidebar: React.FC<SidebarProps> = ({
              Teams
            </Menu.Item>
          ) : null}
+          {userRole == "Admin" ? (
+            <Menu.Item key="7" onClick={() => setPage("admin-panel")}>
+              Admin
+            </Menu.Item>
+          ) : null}
        </Menu>
      </Sider>
    </Layout>
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -313,6 +313,7 @@ export const userSpendLogsCall = async (
  endTime: String
 ) => {
  try {
+    console.log(`user role in spend logs call: ${userRole}`);
    let url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
    if (userRole == "App Owner") {
      url = `${url}/?user_id=${userID}&start_date=${startTime}&end_date=${endTime}`;
@ -343,6 +344,96 @@ export const userSpendLogsCall = async (
  }
 };

+export const adminSpendLogsCall = async (accessToken: String) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/global/spend/logs`
+      : `/global/spend/logs`;
+
+    message.info("Making spend logs request");
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log(data);
+    message.success("Spend Logs received");
+    return data;
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
+export const adminTopKeysCall = async (accessToken: String) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/global/spend/keys?limit=5`
+      : `/global/spend/keys?limit=5`;
+
+    message.info("Making spend keys request");
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log(data);
+    message.success("Spend Logs received");
+    return data;
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
+export const adminTopModelsCall = async (accessToken: String) => {
+  try {
+    let url = proxyBaseUrl
+      ? `${proxyBaseUrl}/global/spend/models?limit=5`
+      : `/global/spend/models?limit=5`;
+
+    message.info("Making spend models request");
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error(errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log(data);
+    message.success("Spend Logs received");
+    return data;
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
+
 export const keyInfoCall = async (accessToken: String, keys: String[]) => {
  try {
    let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
@ -468,6 +559,46 @@ export const userGetRequesedtModelsCall = async (accessToken: String) => {
  }
 };

+export interface User {
+  user_role: string;
+  user_id: string;
+  user_email: string;
+  [key: string]: string; // Include any other potential keys in the dictionary
+}
+
+export const userGetAllUsersCall = async (
+  accessToken: String,
+  role: String
+) => {
+  try {
+    const url = proxyBaseUrl
+      ? `${proxyBaseUrl}/user/get_users?role=${role}`
+      : `/user/get_users?role=${role}`;
+    console.log("in userGetAllUsersCall:", url);
+    const response = await fetch(url, {
+      method: "GET",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error("Failed to delete key: " + errorData);
+      throw new Error("Network response was not ok");
+    }
+    const data = await response.json();
+    console.log(data);
+    message.success("Got all users");
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to get requested models:", error);
+    throw error;
+  }
+};
+
 export const teamCreateCall = async (
  accessToken: string,
  formValues: Record<string, any> // Assuming formValues is an object
@ -549,3 +680,41 @@ export const teamMemberAddCall = async (
    throw error;
  }
 };
+
+export const userUpdateUserCall = async (
+  accessToken: string,
+  formValues: any // Assuming formValues is an object
+) => {
+  try {
+    console.log("Form Values in userUpdateUserCall:", formValues); // Log the form values before making the API call
+
+    const url = proxyBaseUrl ? `${proxyBaseUrl}/user/update` : `/user/update`;
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        user_role: "proxy_admin_viewer",
+        ...formValues, // Include formValues in the request body
+      }),
+    });
+
+    if (!response.ok) {
+      const errorData = await response.text();
+      message.error("Failed to create key: " + errorData);
+      console.error("Error response from the server:", errorData);
+      throw new Error("Network response was not ok");
+    }
+
+    const data = await response.json();
+    console.log("API Response:", data);
+    message.success("User role updated");
+    return data;
+    // Handle success - you might want to update some state or UI based on the created key
+  } catch (error) {
+    console.error("Failed to create key:", error);
+    throw error;
+  }
+};
--- a/ui/litellm-dashboard/src/components/usage.tsx
+++ b/ui/litellm-dashboard/src/components/usage.tsx
@ -2,7 +2,13 @@ import { BarChart, Card, Title } from "@tremor/react";

 import React, { useState, useEffect } from "react";
 import { Grid, Col, Text, LineChart } from "@tremor/react";
-import { userSpendLogsCall, keyInfoCall } from "./networking";
+import {
+  userSpendLogsCall,
+  keyInfoCall,
+  adminSpendLogsCall,
+  adminTopKeysCall,
+  adminTopModelsCall,
+} from "./networking";
 import { start } from "repl";

 interface UsagePageProps {
@ -164,29 +170,61 @@ const UsagePage: React.FC<UsagePageProps> = ({
    if (accessToken && token && userRole && userID) {
      const fetchData = async () => {
        try {
-          await userSpendLogsCall(
-            accessToken,
-            token,
-            userRole,
-            userID,
-            startTime,
-            endTime
-          ).then(async (response) => {
-            const topKeysResponse = await keyInfoCall(
-              accessToken,
-              getTopKeys(response)
-            );
-            const filtered_keys = topKeysResponse["info"].map((k: any) => ({
-              key: (k["key_name"] || k["key_alias"] || k["token"]).substring(
+          /**
+           * If user is Admin - query the global views endpoints
+           * If user is App Owner - use the normal spend logs call
+           */
+          console.log(`user role: ${userRole}`);
+          if (userRole == "Admin") {
+            const overall_spend = await adminSpendLogsCall(accessToken);
+            setKeySpendData(overall_spend);
+            const top_keys = await adminTopKeysCall(accessToken);
+            const filtered_keys = top_keys.map((k: any) => ({
+              key: (k["key_name"] || k["key_alias"] || k["api_key"]).substring(
                0,
                7
              ),
-              spend: k["spend"],
+              spend: k["total_spend"],
            }));
            setTopKeys(filtered_keys);
-            setTopUsers(getTopUsers(response));
-            setKeySpendData(response);
-          });
+            const top_models = await adminTopModelsCall(accessToken);
+          } else if (userRole == "App Owner") {
+            await userSpendLogsCall(
+              accessToken,
+              token,
+              userRole,
+              userID,
+              startTime,
+              endTime
+            ).then(async (response) => {
+              console.log("result from spend logs call", response);
+              if ("daily_spend" in response) {
+                // this is from clickhouse analytics
+                //
+                let daily_spend = response["daily_spend"];
+                console.log("daily spend", daily_spend);
+                setKeySpendData(daily_spend);
+                let topApiKeys = response.top_api_keys;
+                setTopKeys(topApiKeys);
+              } else {
+                const topKeysResponse = await keyInfoCall(
+                  accessToken,
+                  getTopKeys(response)
+                );
+                const filtered_keys = topKeysResponse["info"].map((k: any) => ({
+                  key: (
+                    k["key_name"] ||
+                    k["key_alias"] ||
+                    k["token"]
+                  ).substring(0, 7),
+                  spend: k["spend"],
+                }));
+                setTopKeys(filtered_keys);
+                setTopUsers(getTopUsers(response));
+                setKeySpendData(response);
+              }
+            });
+          }
        } catch (error) {
          console.error("There was an error fetching the data", error);
          // Optionally, update your UI to reflect the error state here as well
@ -210,7 +248,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
              valueFormatter={valueFormatter}
              yAxisWidth={100}
              tickGap={5}
-              customTooltip={customTooltip}
+              // customTooltip={customTooltip}
            />
          </Card>
        </Col>