Merge branch 'main' into litellm_spend_logging_high_traffic

2025-04-27 11:43:54 +00:00 · 2024-02-06 23:36:58 -08:00 · 2024-02-06 23:36:58 -08:00 · f5eca2f4ad
commit f5eca2f4ad
parent c7b3c3d43f 1719f2fabc
36 changed files with 638 additions and 291 deletions
--- a/3
+++ b/3
@ -32,6 +32,9 @@ RUN pip install dist/*.whl
 # install dependencies as wheels
 RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt

+# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 
+RUN pip install redisvl==0.0.7 --no-deps
+
 # Runtime stage
 FROM $LITELLM_RUNTIME_IMAGE as runtime

--- a/Dockerfile.database
+++ b/Dockerfile.database
@ -47,6 +47,9 @@ COPY --from=builder /wheels/ /wheels/
 # Install the built wheel using pip; again using a wildcard if it's the only file
 RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels

+# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0 
+RUN pip install redisvl==0.0.7 --no-deps
+
 # Generate prisma client
 RUN prisma generate
 RUN chmod +x entrypoint.sh
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@ -17,7 +17,7 @@ LiteLLM supports:

 Caching can be enabled by adding the `cache` key in the `config.yaml`

-### Step 1: Add `cache` to the config.yaml
+#### Step 1: Add `cache` to the config.yaml
 ```yaml
 model_list:
  - model_name: gpt-3.5-turbo
@ -32,7 +32,7 @@ litellm_settings:
  cache: True          # set cache responses to True, litellm defaults to using a redis cache
 ```

-### Step 2: Add Redis Credentials to .env
+#### Step 2: Add Redis Credentials to .env
 Set either `REDIS_URL` or the `REDIS_HOST` in your os environment, to enable caching.

  ```shell
@ -50,7 +50,7 @@ REDIS_<redis-kwarg-name> = ""
 ``` 

 [**See how it's read from the environment**](https://github.com/BerriAI/litellm/blob/4d7ff1b33b9991dcf38d821266290631d9bcd2dd/litellm/_redis.py#L40)
-### Step 3: Run proxy with config
+#### Step 3: Run proxy with config
 ```shell
 $ litellm --config /path/to/config.yaml
 ```
@ -58,7 +58,7 @@ $ litellm --config /path/to/config.yaml

 <TabItem value="s3" label="s3 cache">

-### Step 1: Add `cache` to the config.yaml
+#### Step 1: Add `cache` to the config.yaml
 ```yaml
 model_list:
  - model_name: gpt-3.5-turbo
@ -80,7 +80,7 @@ litellm_settings:
    s3_endpoint_url: https://s3.amazonaws.com  # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 buckets
 ```

-### Step 2: Run proxy with config
+#### Step 2: Run proxy with config
 ```shell
 $ litellm --config /path/to/config.yaml
 ```
@ -91,7 +91,7 @@ $ litellm --config /path/to/config.yaml

 Caching can be enabled by adding the `cache` key in the `config.yaml`

-### Step 1: Add `cache` to the config.yaml
+#### Step 1: Add `cache` to the config.yaml
 ```yaml
 model_list:
  - model_name: gpt-3.5-turbo
@ -113,7 +113,7 @@ litellm_settings:
    redis_semantic_cache_embedding_model: azure-embedding-model # set this to a model_name set in model_list
 ```

-### Step 2: Add Redis Credentials to .env
+#### Step 2: Add Redis Credentials to .env
 Set either `REDIS_URL` or the `REDIS_HOST` in your os environment, to enable caching.

  ```shell
@ -130,7 +130,7 @@ You can pass in any additional redis.Redis arg, by storing the variable + value
 REDIS_<redis-kwarg-name> = ""
 ``` 

-### Step 3: Run proxy with config
+#### Step 3: Run proxy with config
 ```shell
 $ litellm --config /path/to/config.yaml
 ```
--- a/docs/my-website/docs/proxy/users.md
+++ b/docs/my-website/docs/proxy/users.md
@ -13,6 +13,7 @@ Requirements:
 You can set budgets at 3 levels: 
 - For the proxy 
 - For a user 
+- For a 'user' passed to `/chat/completions`, `/embeddings` etc
 - For a key


@ -117,6 +118,61 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
 --data '{"models": ["azure-models"], "user_id": "krrish3@berri.ai"}'
 ```

+</TabItem>
+<TabItem value="per-user-chat" label="For 'user' passed to /chat/completions">
+
+Use this to budget `user` passed to `/chat/completions`, **without needing to create a key for every user**
+
+**Step 1. Modify config.yaml**
+Define `litellm.max_user_budget`
+```yaml
+general_settings:
+  master_key: sk-1234
+
+litellm_settings:
+  max_budget: 10      # global budget for proxy 
+  max_user_budget: 0.0001 # budget for 'user' passed to /chat/completions
+```
+
+2. Make a /chat/completions call, pass 'user' - First call Works 
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+        --header 'Content-Type: application/json' \
+        --header 'Authorization: Bearer sk-zi5onDRdHGD24v0Zdn7VBA' \
+        --data ' {
+        "model": "azure-gpt-3.5",
+        "user": "ishaan3",
+        "messages": [
+            {
+            "role": "user",
+            "content": "what time is it"
+            }
+        ]
+        }'
+```
+
+3. Make a /chat/completions call, pass 'user' - Call Fails, since 'ishaan3' over budget
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+        --header 'Content-Type: application/json' \
+        --header 'Authorization: Bearer sk-zi5onDRdHGD24v0Zdn7VBA' \
+        --data ' {
+        "model": "azure-gpt-3.5",
+        "user": "ishaan3",
+        "messages": [
+            {
+            "role": "user",
+            "content": "what time is it"
+            }
+        ]
+        }'
+```
+
+Error
+```shell
+{"error":{"message":"Authentication Error, ExceededBudget: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%                
+```
+
 </TabItem>
 <TabItem value="per-key" label="For Key">

--- a/litellm/init.py
+++ b/litellm/init.py
@ -148,6 +148,7 @@ s3_callback_params: Optional[Dict] = None
 default_key_generate_params: Optional[Dict] = None
 upperbound_key_generate_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
+max_user_budget: Optional[float] = None
 #### RELIABILITY ####
 request_timeout: Optional[float] = 6000
 num_retries: Optional[int] = None  # per model endpoint
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -230,7 +230,7 @@ def get_ollama_response(
    model_response["model"] = "ollama/" + model
    prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages))  # type: ignore
    completion_tokens = response_json.get(
-        "eval_count", litellm.token_counter(text=response_json["message"])
+        "eval_count", litellm.token_counter(text=response_json["message"]["content"])
    )
    model_response["usage"] = litellm.Usage(
        prompt_tokens=prompt_tokens,
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-ea657eeec2abf062.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-ea657eeec2abf062.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-7f03ccc8529ada97.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-7f03ccc8529ada97.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-992f4cdd1053ee86.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-992f4cdd1053ee86.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-app-096338c8e1915716.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-app-096338c8e1915716.js
@ -1 +1 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js
@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/p5gDwQBbgW8D3Uz3lgoZg/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/p5gDwQBbgW8D3Uz3lgoZg/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/p5gDwQBbgW8D3Uz3lgoZg/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/p5gDwQBbgW8D3Uz3lgoZg/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:"$Sreact.suspense"
-3:I[27680,["787","static/chunks/787-5bb33960644f5c7c.js","931","static/chunks/app/page-992f4cdd1053ee86.js"],""]
+3:I[27680,["787","static/chunks/787-5bb33960644f5c7c.js","931","static/chunks/app/page-7f03ccc8529ada97.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["lGjwnJSGwBqa476jHHI8W",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$2",null,{"fallback":["$","div",null,{"children":"Loading..."}],"children":["$","div",null,{"className":"flex min-h-screen flex-col ","children":["$","$L3",null,{}]}]}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a6a9860a7fe022a9.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
-6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"Create Next App"}],["$","meta","3",{"name":"description","content":"Generated by create next app"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
+0:["p5gDwQBbgW8D3Uz3lgoZg",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$2",null,{"fallback":["$","div",null,{"children":"Loading..."}],"children":["$","div",null,{"className":"flex min-h-screen flex-col ","children":["$","$L3",null,{}]}]}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a6a9860a7fe022a9.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -68,6 +68,7 @@ litellm_settings:
  fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
  success_callback: ['langfuse']
  max_budget: 10      # global budget for proxy 
+  max_user_budget: 0.0001
  budget_duration: 30d    # global budget duration, will reset after 30d
  default_key_generate_params:
    max_budget: 1.5000
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -91,6 +91,7 @@ from litellm.proxy.utils import (
    reset_budget,
    hash_token,
    html_form,
+    _read_request_body,
 )
 from litellm.proxy.secret_managers.google_kms import load_google_kms
 import pydantic
@ -322,6 +323,7 @@ async def user_api_key_auth(
                f"Malformed API Key passed in. Ensure Key has `Bearer ` prefix. Passed in: {passed_in_key}"
            )

+        ### CHECK IF ADMIN ###
        # note: never string compare api keys, this is vulenerable to a time attack. Use secrets.compare_digest instead
        is_master_key_valid = secrets.compare_digest(api_key, master_key)
        if is_master_key_valid:
@ -370,8 +372,9 @@ async def user_api_key_auth(
            # Run checks for
            # 1. If token can call model
            # 2. If user_id for this token is in budget
-            # 3. If token is expired
-            # 4. If token spend is under Budget for the token
+            # 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
+            # 4. If token is expired
+            # 5. If token spend is under Budget for the token

            # Check 1. If token can call model
            litellm.model_alias_map = valid_token.aliases
@ -430,9 +433,18 @@ async def user_api_key_auth(
                )

            # Check 2. If user_id for this token is in budget
-            ## Check 2.5 If global proxy is in budget
+            ## Check 2.1 If global proxy is in budget
+            ## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
            if valid_token.user_id is not None:
                user_id_list = [valid_token.user_id, litellm_proxy_budget_name]
+                if (
+                    litellm.max_user_budget is not None
+                ):  # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
+                    request_data = await _read_request_body(request=request)
+                    user_passed_to_chat_completions = request_data.get("user", None)
+                    if user_passed_to_chat_completions is not None:
+                        user_id_list.append(user_passed_to_chat_completions)
+                
                user_id_information = None
                for id in user_id_list:
                    value = user_api_key_cache.get_cache(key=id)
@ -462,6 +474,7 @@ async def user_api_key_auth(
                            key=valid_token.user_id, table_name="user"
                        )
                
+
                verbose_proxy_logger.debug(
                    f"user_id_information: {user_id_information}"
                )
@ -473,12 +486,18 @@ async def user_api_key_auth(
                            if _user is None:
                                continue
                            assert isinstance(_user, dict)
+                            # check if user is admin #
+                            if (
+                                _user.get("user_role", None) is not None
+                                and _user.get("user_role") == "proxy_admin"
+                            ):
+                                return UserAPIKeyAuth(api_key=master_key)
                            # Token exists, not expired now check if its in budget for the user
                            user_max_budget = _user.get("max_budget", None)
                            user_current_spend = _user.get("spend", None)

                            verbose_proxy_logger.debug(
-                                f"user_max_budget: {user_max_budget}; user_current_spend: {user_current_spend}"
+                                f"user_id: {_user.get('user_id', None)}; user_max_budget: {user_max_budget}; user_current_spend: {user_current_spend}"
                            )

                            if (
@ -616,10 +635,13 @@ async def user_api_key_auth(
                    # check if user can access this route
                    query_params = request.query_params
                    user_id = query_params.get("user_id")
+                    verbose_proxy_logger.debug(
+                        f"user_id: {user_id} & valid_token.user_id: {valid_token.user_id}"
+                    )
                    if user_id != valid_token.user_id:
                        raise HTTPException(
                            status_code=status.HTTP_403_FORBIDDEN,
-                            detail="user not allowed to access this key's info",
+                            detail="key not allowed to access this user's info",
                        )
                elif route == "/user/update":
                    raise HTTPException(
@ -860,7 +882,6 @@ async def update_database(
            - Update that user's row
            - Update litellm-proxy-budget row (global proxy spend)
            """
-            try:
            user_ids = [user_id, litellm_proxy_budget_name]
            data_list = []
            for id in user_ids:
@ -868,15 +889,22 @@ async def update_database(
                    continue
                if prisma_client is not None:
                    existing_spend_obj = await prisma_client.get_data(user_id=id)
-                    elif (
-                        custom_db_client is not None and id != litellm_proxy_budget_name
-                    ):
+                elif custom_db_client is not None and id != litellm_proxy_budget_name:
                    existing_spend_obj = await custom_db_client.get_data(
                        key=id, table_name="user"
                    )
                verbose_proxy_logger.debug(
                    f"Updating existing_spend_obj: {existing_spend_obj}"
                )
+                if existing_spend_obj is None:
+                    # if user does not exist in LiteLLM_UserTable, create a new user
+                    existing_spend = 0
+                    max_user_budget = None
+                    if litellm.max_user_budget is not None:
+                        max_user_budget = litellm.max_user_budget
+                    existing_spend_obj = LiteLLM_UserTable(
+                        user_id=id, spend=0, max_budget=max_user_budget, user_email=None
+                    )
                    if existing_spend_obj is None:
                        existing_spend = 0
                        existing_spend_obj = LiteLLM_UserTable(
@ -1147,7 +1175,7 @@ class ProxyConfig:
        # load existing config
        config = await self.get_config()
        ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
-        litellm_settings = config.get("litellm_settings", None)
+        litellm_settings = config.get("litellm_settings", {})
        all_teams_config = litellm_settings.get("default_team_settings", None)
        team_config: dict = {}
        if all_teams_config is None:
@ -1791,7 +1819,33 @@ async def async_data_generator(response, user_api_key_dict):
        done_message = "[DONE]"
        yield f"data: {done_message}\n\n"
    except Exception as e:
-        yield f"data: {str(e)}\n\n"
+        traceback.print_exc()
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict, original_exception=e
+        )
+        verbose_proxy_logger.debug(
+            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
+        )
+        router_model_names = (
+            [m["model_name"] for m in llm_model_list]
+            if llm_model_list is not None
+            else []
+        )
+        if user_debug:
+            traceback.print_exc()
+
+        if isinstance(e, HTTPException):
+            raise e
+        else:
+            error_traceback = traceback.format_exc()
+            error_msg = f"{str(e)}\n\n{error_traceback}"
+
+        raise ProxyException(
+            message=getattr(e, "message", error_msg),
+            type=getattr(e, "type", "None"),
+            param=getattr(e, "param", "None"),
+            code=getattr(e, "status_code", 500),
+        )


 def select_data_generator(response, user_api_key_dict):
@ -1799,7 +1853,7 @@ def select_data_generator(response, user_api_key_dict):
        # since boto3 - sagemaker does not support async calls, we should use a sync data_generator
        if hasattr(
            response, "custom_llm_provider"
-        ) and response.custom_llm_provider in ["sagemaker", "together_ai"]:
+        ) and response.custom_llm_provider in ["sagemaker"]:
            return data_generator(
                response=response,
            )
@ -1892,6 +1946,10 @@ async def startup_event():

    if prisma_client is not None and master_key is not None:
        # add master key to db
+        user_id = "default_user_id"
+        if os.getenv("PROXY_ADMIN_ID", None) is not None:
+            user_id = os.getenv("PROXY_ADMIN_ID")
+
        asyncio.create_task(
            generate_key_helper_fn(
                duration=None,
@ -1900,7 +1958,12 @@ async def startup_event():
                config={},
                spend=0,
                token=master_key,
-                user_id="default_user_id",
+                user_id=user_id,
+                user_role="proxy_admin",
+                query_type="update_data",
+                update_key_values={
+                    "user_role": "proxy_admin",
+                },
            )
        )

@ -2284,7 +2347,6 @@ async def chat_completion(
            selected_data_generator = select_data_generator(
                response=response, user_api_key_dict=user_api_key_dict
            )
-
            return StreamingResponse(
                selected_data_generator,
                media_type="text/event-stream",
@ -3459,23 +3521,38 @@ async def auth_callback(request: Request):
        result = await microsoft_sso.verify_and_process(request)

    # User is Authe'd in - generate key for the UI to access Proxy
-    user_id = getattr(result, "email", None)
+    user_email = getattr(result, "email", None)
+    user_id = getattr(result, "id", None)
    if user_id is None:
        user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")

    response = await generate_key_helper_fn(
-        **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard"}  # type: ignore
+        **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email}  # type: ignore
    )
-
    key = response["token"]  # type: ignore
    user_id = response["user_id"]  # type: ignore

    litellm_dashboard_ui = "/ui/"

+    user_role = "app_owner"
+    if (
+        os.getenv("PROXY_ADMIN_ID", None) is not None
+        and os.environ["PROXY_ADMIN_ID"] == user_id
+    ):
+        # checks if user is admin
+        user_role = "app_admin"
+
    import jwt

    jwt_token = jwt.encode(
-        {"user_id": user_id, "key": key}, "secret", algorithm="HS256"
+        {
+            "user_id": user_id,
+            "key": key,
+            "user_email": user_email,
+            "user_role": user_role,
+        },
+        "secret",
+        algorithm="HS256",
    )
    litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token

@ -3488,10 +3565,18 @@ async def auth_callback(request: Request):
    "/user/info", tags=["user management"], dependencies=[Depends(user_api_key_auth)]
 )
 async def user_info(
-    user_id: str = fastapi.Query(..., description="User ID in the request parameters")
+    user_id: Optional[str] = fastapi.Query(
+        default=None, description="User ID in the request parameters"
+    )
 ):
    """
    Use this to get user information. (user row + all user key info)
+
+    Example request
+    ```
+    curl -X GET 'http://localhost:8000/user/info?user_id=krrish7%40berri.ai' \
+    --header 'Authorization: Bearer sk-1234'
+    ```
    """
    global prisma_client
    try:
@ -3500,11 +3585,25 @@ async def user_info(
                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
            )
        ## GET USER ROW ##
+        if user_id is not None:
            user_info = await prisma_client.get_data(user_id=user_id)
+        else:
+            user_info = None
        ## GET ALL KEYS ##
        keys = await prisma_client.get_data(
-            user_id=user_id, table_name="key", query_type="find_all"
+            user_id=user_id,
+            table_name="key",
+            query_type="find_all",
+            expires=datetime.now(),
        )
+
+        if user_info is None:
+            ## make sure we still return a total spend ##
+            spend = 0
+            for k in keys:
+                spend += getattr(k, "spend", 0)
+            user_info = {"spend": spend}
+
        ## REMOVE HASHED TOKEN INFO before returning ##
        for key in keys:
            try:
@ -4109,7 +4208,7 @@ async def health_readiness():
            cache_type = {"type": cache_type, "index_info": index_info}

    if prisma_client is not None:  # if db passed in, check if it's connected
-        if prisma_client.db.is_connected() == True:
+        await prisma_client.health_check()  # test the db connection
        response_object = {"db": "connected"}

        return {
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -149,6 +149,14 @@ class ProxyLogging:
        if request_data is not None:
            model = request_data.get("model", "")
            messages = request_data.get("messages", "")
+            trace_id = request_data.get("metadata", {}).get(
+                "trace_id", None
+            )  # get langfuse trace id
+            if trace_id is not None:
+                messages = str(messages)
+                messages = messages[:100]
+                messages = f"{messages}\nLangfuse Trace Id: {trace_id}"
+            else:
                # try casting messages to str and get the first 100 characters, else mark as None
                try:
                    messages = str(messages)
@ -466,8 +474,6 @@ class PrismaClient:
        reset_at: Optional[datetime] = None,
    ):
        try:
-            print_verbose("PrismaClient: get_data")
-
            response: Any = None
            if token is not None or (table_name is not None and table_name == "key"):
                # check if plain text or hash
@ -553,6 +559,17 @@ class PrismaClient:
                    # The asterisk before `user_id_list` unpacks the list into separate arguments
                    response = await self.db.query_raw(sql_query)
                elif query_type == "find_all":
+                    if expires is not None:
+                        response = await self.db.litellm_usertable.find_many(  # type: ignore
+                            order={"spend": "desc"},
+                            where={  # type:ignore
+                                "OR": [
+                                    {"expires": None},  # type:ignore
+                                    {"expires": {"gt": expires}},  # type:ignore
+                                ],
+                            },
+                        )
+                    else:
                        response = await self.db.litellm_usertable.find_many(  # type: ignore
                            order={"spend": "desc"},
                        )
@ -879,6 +896,21 @@ class PrismaClient:
            )
            raise e

+    async def health_check(self):
+        """
+        Health check endpoint for the prisma client
+        """
+        sql_query = """
+            SELECT 1
+            FROM "LiteLLM_VerificationToken"
+            LIMIT 1
+            """
+
+        # Execute the raw query
+        # The asterisk before `user_id_list` unpacks the list into separate arguments
+        response = await self.db.query_raw(sql_query)
+        return response
+

 class DBClient:
    """
@ -1207,6 +1239,28 @@ async def reset_budget(prisma_client: PrismaClient):
            )


+async def _read_request_body(request):
+    """
+    Asynchronous function to read the request body and parse it as JSON or literal data.
+
+    Parameters:
+    - request: The request object to read the body from
+
+    Returns:
+    - dict: Parsed request data as a dictionary
+    """
+    import ast, json
+
+    request_data = {}
+    body = await request.body()
+    body_str = body.decode()
+    try:
+        request_data = ast.literal_eval(body_str)
+    except:
+        request_data = json.loads(body_str)
+    return request_data
+
+
 # LiteLLM Admin UI - Non SSO Login
 html_form = """
 <!DOCTYPE html>
--- a/litellm/tests/test_key_generate_dynamodb.py
+++ b/litellm/tests/test_key_generate_dynamodb.py
@ -513,4 +513,4 @@ def test_dynamo_db_migration(custom_db_client):

        asyncio.run(test())
    except Exception as e:
-        pytest.fail(f"An exception occurred - {str(e)}")
+        pytest.fail(f"An exception occurred - {traceback.format_exc()}")
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -322,6 +322,87 @@ def test_call_with_user_over_budget(prisma_client):
        print(vars(e))


+def test_call_with_end_user_over_budget(prisma_client):
+    # Test if a user passed to /chat/completions is tracked & fails whe they cross their budget
+    # we only check this when litellm.max_user_budget is set
+    import random
+
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm, "max_user_budget", 0.00001)
+    try:
+
+        async def test():
+            await litellm.proxy.proxy_server.prisma_client.connect()
+            request = GenerateKeyRequest()  # create a key with no budget
+            key = await new_user(request)
+            print(key)
+
+            generated_key = key.key
+            bearer_token = "Bearer " + generated_key
+            user = f"ishaan {random.randint(0, 10000)}"
+            request = Request(scope={"type": "http"})
+            request._url = URL(url="/chat/completions")
+
+            async def return_body():
+                return_string = f'{{"model": "gemini-pro-vision", "user": "{user}"}}'
+                # return string as bytes
+                return return_string.encode()
+
+            request.body = return_body
+
+            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm.proxy.proxy_server import (
+                _PROXY_track_cost_callback as track_cost_callback,
+            )
+            from litellm import ModelResponse, Choices, Message, Usage
+
+            resp = ModelResponse(
+                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
+                choices=[
+                    Choices(
+                        finish_reason=None,
+                        index=0,
+                        message=Message(
+                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                            role="assistant",
+                        ),
+                    )
+                ],
+                model="gpt-35-turbo",  # azure always has model written like this
+                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+            )
+            await track_cost_callback(
+                kwargs={
+                    "stream": False,
+                    "litellm_params": {
+                        "metadata": {
+                            "user_api_key": generated_key,
+                            "user_api_key_user_id": user,
+                        },
+                        "proxy_server_request": {
+                            "user": user,
+                        },
+                    },
+                    "response_cost": 10,
+                },
+                completion_response=resp,
+                start_time=datetime.now(),
+                end_time=datetime.now(),
+            )
+            await asyncio.sleep(5)
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+            pytest.fail(f"This should have failed!. They key crossed it's budget")
+
+        asyncio.run(test())
+    except Exception as e:
+        error_detail = e.message
+        assert "Authentication Error, ExceededBudget:" in error_detail
+        print(vars(e))
+
+
 def test_call_with_proxy_over_budget(prisma_client):
    # 5.1 Make a call with a proxy over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -169,6 +169,8 @@ def map_finish_reason(
        return "stop"
    elif finish_reason == "SAFETY":  # vertex ai
        return "content_filter"
+    elif finish_reason == "STOP":  # vertex ai
+        return "stop"
    return finish_reason


@ -1305,7 +1307,7 @@ class Logging:
                        )
                    if callback == "langfuse":
                        global langFuseLogger
-                        verbose_logger.debug("reaches langfuse for logging!")
+                        verbose_logger.debug("reaches langfuse for success logging!")
                        kwargs = {}
                        for k, v in self.model_call_details.items():
                            if (
@ -6716,7 +6718,13 @@ def exception_type(
                        message=f"VertexAIException - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
-                        response=original_exception.response,
+                        response=httpx.Response(
+                            status_code=429,
+                            request=httpx.Request(
+                                method="POST",
+                                url=" https://cloud.google.com/vertex-ai/",
+                            ),
+                        ),
                    )
                elif (
                    "429 Quota exceeded" in error_str
@ -8351,13 +8359,20 @@ class CustomStreamWrapper:
                completion_obj["content"] = chunk.text
            elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
                try:
-                    # print(chunk)
-                    if hasattr(chunk, "text"):
-                        # vertexAI chunks return
-                        # MultiCandidateTextGenerationResponse(text=' ```python\n# This Python code says "Hi" 100 times.\n\n# Create', _prediction_response=Prediction(predictions=[{'candidates': [{'content': ' ```python\n# This Python code says "Hi" 100 times.\n\n# Create', 'author': '1'}], 'citationMetadata': [{'citations': None}], 'safetyAttributes': [{'blocked': False, 'scores': None, 'categories': None}]}], deployed_model_id='', model_version_id=None, model_resource_name=None, explanations=None), is_blocked=False, safety_attributes={}, candidates=[ ```python
-                        # This Python code says "Hi" 100 times.
-                        # Create])
+                    if hasattr(chunk, "candidates") == True:
+                        try:
                            completion_obj["content"] = chunk.text
+                            if hasattr(chunk.candidates[0], "finish_reason"):
+                                model_response.choices[
+                                    0
+                                ].finish_reason = map_finish_reason(
+                                    chunk.candidates[0].finish_reason.name
+                                )
+                        except:
+                            if chunk.candidates[0].finish_reason.name == "SAFETY":
+                                raise Exception(
+                                    f"The response was blocked by VertexAI. {str(chunk)}"
+                                )
                    else:
                        completion_obj["content"] = str(chunk)
                except StopIteration as e:
@ -8646,7 +8661,6 @@ class CustomStreamWrapper:
                or self.custom_llm_provider == "ollama_chat"
                or self.custom_llm_provider == "vertex_ai"
            ):
-                print_verbose(f"INSIDE ASYNC STREAMING!!!")
                print_verbose(
                    f"value of async completion stream: {self.completion_stream}"
                )
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.22.9"
+version = "1.22.11"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.22.9"
+version = "1.22.11"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/layout-4d667c133e03c98b.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/layout-4d667c133e03c98b.js
@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-992f4cdd1053ee86.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-992f4cdd1053ee86.js
--- a/ui/litellm-dashboard/out/_next/static/lGjwnJSGwBqa476jHHI8W/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/lGjwnJSGwBqa476jHHI8W/_buildManifest.js
@ -1 +0,0 @@
-self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
--- a/ui/litellm-dashboard/out/_next/static/lGjwnJSGwBqa476jHHI8W/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/lGjwnJSGwBqa476jHHI8W/_ssgManifest.js
@ -1 +0,0 @@
-self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:"$Sreact.suspense"
-3:I[27680,["787","static/chunks/787-5bb33960644f5c7c.js","931","static/chunks/app/page-992f4cdd1053ee86.js"],""]
+3:I[27680,["787","static/chunks/787-5bb33960644f5c7c.js","931","static/chunks/app/page-7f03ccc8529ada97.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["lGjwnJSGwBqa476jHHI8W",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$2",null,{"fallback":["$","div",null,{"children":"Loading..."}],"children":["$","div",null,{"className":"flex min-h-screen flex-col ","children":["$","$L3",null,{}]}]}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a6a9860a7fe022a9.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
-6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"Create Next App"}],["$","meta","3",{"name":"description","content":"Generated by create next app"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
+0:["p5gDwQBbgW8D3Uz3lgoZg",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$2",null,{"fallback":["$","div",null,{"children":"Loading..."}],"children":["$","div",null,{"className":"flex min-h-screen flex-col ","children":["$","$L3",null,{}]}]}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a6a9860a7fe022a9.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/components/navbar.tsx
+++ b/ui/litellm-dashboard/src/components/navbar.tsx
@ -1,40 +1,50 @@
 "use client";

-import Link from 'next/link';
-import Image from 'next/image'
-import React, { useState } from 'react';
+import Link from "next/link";
+import Image from "next/image";
+import React, { useState } from "react";
 import { useSearchParams } from "next/navigation";
-import { Button, Text, Metric,Title, TextInput, Grid, Col, Card } from "@tremor/react";
+import {
+  Button,
+  Text,
+  Metric,
+  Title,
+  TextInput,
+  Grid,
+  Col,
+  Card,
+} from "@tremor/react";

 // Define the props type
 interface NavbarProps {
  userID: string | null;
  userRole: string | null;
+  userEmail: string | null;
 }
-const Navbar: React.FC<NavbarProps> = ({ userID, userRole }) => {
+const Navbar: React.FC<NavbarProps> = ({ userID, userRole, userEmail }) => {
  console.log("User ID:", userID);
+  console.log("userEmail:", userEmail);

  return (
    <nav className="left-0 right-0 top-0 flex justify-between items-center h-12 mb-4">
      <div className="text-left mx-4 my-2 absolute top-0 left-0">
        <div className="flex flex-col items-center">
          <Link href="/">
-                    <button className="text-gray-800 text-2xl px-4 py-1 rounded text-center">🚅 LiteLLM</button>
+            <button className="text-gray-800 text-2xl px-4 py-1 rounded text-center">
+              🚅 LiteLLM
+            </button>
          </Link>
        </div>
      </div>
      <div className="text-right mx-4 my-2 absolute top-0 right-0">
-                <Button variant='secondary'>
-                {userID}
-                <p>
-                Role: {userRole}
-                </p>
-                
+        <Button variant="secondary">
+          {userEmail}
+          <p>Role: {userRole}</p>
+          <p>ID: {userID}</p>
        </Button>
      </div>
-            
    </nav>
-    )
-}
+  );
+};

 export default Navbar;
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -1,15 +1,15 @@
 /**
 * Helper file for calls being made to proxy
 */
-import { message } from 'antd';
+import { message } from "antd";

-const proxyBaseUrl = null;
-// const proxyBaseUrl = "http://localhost:4000" // http://localhost:4000
+const isLocal = process.env.NODE_ENV === "development";
+const proxyBaseUrl = isLocal ? "http://localhost:4000" : null;

 export const keyCreateCall = async (
  accessToken: string,
  userID: string,
-  formValues: Record<string, any>, // Assuming formValues is an object
+  formValues: Record<string, any> // Assuming formValues is an object
 ) => {
  try {
    console.log("Form Values in keyCreateCall:", formValues); // Log the form values before making the API call
@ -18,7 +18,7 @@ export const keyCreateCall = async (
    if (formValues.description) {
      // add to formValues.metadata
      if (!formValues.metadata) {
-        formValues.metadata = {}
+        formValues.metadata = {};
      }
      // value needs to be in "", valid JSON
      formValues.metadata.description = formValues.description;
@ -69,15 +69,11 @@ export const keyCreateCall = async (
  }
 };

-
-export const keyDeleteCall = async (
-  accessToken: String,
-  user_key: String
-) => {
+export const keyDeleteCall = async (accessToken: String, user_key: String) => {
  try {
    const url = proxyBaseUrl ? `${proxyBaseUrl}/key/delete` : `/key/delete`;
-    console.log("in keyDeleteCall:", user_key)
-    
+    console.log("in keyDeleteCall:", user_key);
+    message.info("Making key delete request");
    const response = await fetch(url, {
      method: "POST",
      headers: {
@ -108,21 +104,22 @@ export const keyDeleteCall = async (

 export const userInfoCall = async (
  accessToken: String,
-  userID: String
+  userID: String,
+  userRole: String
 ) => {
  try {
-    const url = proxyBaseUrl ? `${proxyBaseUrl}/user/info` : `/user/info`;
-    console.log("in userInfoCall:", url)
-    const response = await fetch(
-      `${url}/?user_id=${userID}`,
-      {
+    let url = proxyBaseUrl ? `${proxyBaseUrl}/user/info` : `/user/info`;
+    if (userRole == "App Owner") {
+      url = `${url}/?user_id=${userID}`;
+    }
+    message.info("Requesting user data");
+    const response = await fetch(url, {
      method: "GET",
      headers: {
        Authorization: `Bearer ${accessToken}`,
        "Content-Type": "application/json",
      },
-      }
-    );
+    });

    if (!response.ok) {
      const errorData = await response.text();
@ -131,7 +128,7 @@ export const userInfoCall = async (
    }

    const data = await response.json();
-    console.log(data);
+    message.info("Received user data");
    return data;
    // Handle success - you might want to update some state or UI based on the created key
  } catch (error) {
@ -140,24 +137,17 @@ export const userInfoCall = async (
  }
 };

-
-export const keySpendLogsCall = async (
-  accessToken: String,
-  token: String
-) => {
+export const keySpendLogsCall = async (accessToken: String, token: String) => {
  try {
    const url = proxyBaseUrl ? `${proxyBaseUrl}/spend/logs` : `/spend/logs`;
-    console.log("in keySpendLogsCall:", url)
-    const response = await fetch(
-      `${url}/?api_key=${token}`,
-      {
+    console.log("in keySpendLogsCall:", url);
+    const response = await fetch(`${url}/?api_key=${token}`, {
      method: "GET",
      headers: {
        Authorization: `Bearer ${accessToken}`,
        "Content-Type": "application/json",
      },
-      }
-    );
+    });
    if (!response.ok) {
      const errorData = await response.text();
      message.error(errorData);
@ -171,4 +161,4 @@ export const keySpendLogsCall = async (
    console.error("Failed to create key:", error);
    throw error;
  }
-}
+};
--- a/ui/litellm-dashboard/src/components/user_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/user_dashboard.tsx
@ -6,21 +6,25 @@ import CreateKey from "./create_key_button";
 import ViewKeyTable from "./view_key_table";
 import ViewUserSpend from "./view_user_spend";
 import EnterProxyUrl from "./enter_proxy_url";
+import { message } from "antd";
 import Navbar from "./navbar";
 import { useSearchParams } from "next/navigation";
 import { jwtDecode } from "jwt-decode";

-const proxyBaseUrl = null;
-// const proxyBaseUrl = "http://localhost:4000" // http://localhost:4000
+const isLocal = process.env.NODE_ENV === "development";
+console.log("isLocal:", isLocal);
+const proxyBaseUrl = isLocal ? "http://localhost:4000" : null;

 type UserSpendData = {
  spend: number;
  max_budget?: number | null;
-}
+};

 const UserDashboard = () => {
  const [data, setData] = useState<null | any[]>(null); // Keep the initialization of state here
-  const [userSpendData, setUserSpendData] = useState<UserSpendData | null>(null);
+  const [userSpendData, setUserSpendData] = useState<UserSpendData | null>(
+    null
+  );

  // Assuming useSearchParams() hook exists and works in your setup
  const searchParams = useSearchParams();
@ -30,19 +34,19 @@ const UserDashboard = () => {
  const token = searchParams.get("token");
  const [accessToken, setAccessToken] = useState<string | null>(null);
  const [userRole, setUserRole] = useState<string | null>(null);
-
+  const [userEmail, setUserEmail] = useState<string | null>(null);

  function formatUserRole(userRole: string) {
    if (!userRole) {
      return "Undefined Role";
    }
-  
+    console.log(`Received user role: ${userRole}`);
    switch (userRole.toLowerCase()) {
      case "app_owner":
        return "App Owner";
      case "demo_app_owner":
        return "App Owner";
-      case "admin":
+      case "app_admin":
        return "Admin";
      case "app_user":
        return "App User";
@ -71,17 +75,19 @@ const UserDashboard = () => {
        } else {
          console.log("User role not defined");
        }
+
+        if (decoded.user_email) {
+          setUserEmail(decoded.user_email);
+        } else {
+          console.log(`User Email is not set ${decoded}`);
        }
      }
-    if (userID && accessToken  && !data) {
+    }
+    if (userID && accessToken && userRole && !data) {
      const fetchData = async () => {
        try {
-          const response = await userInfoCall(
-            accessToken,
-            userID
-          );
-          console.log("Response:", response);
-          setUserSpendData(response["user_info"])
+          const response = await userInfoCall(accessToken, userID, userRole);
+          setUserSpendData(response["user_info"]);
          setData(response["keys"]); // Assuming this is the correct path to your data
        } catch (error) {
          console.error("There was an error fetching the data", error);
@ -93,35 +99,28 @@ const UserDashboard = () => {
  }, [userID, token, accessToken, data]);

  if (userID == null || token == null) {
-
-  
    // Now you can construct the full URL
-    const url = proxyBaseUrl ? `${proxyBaseUrl}/sso/key/generate` : `/sso/key/generate`;
+    const url = proxyBaseUrl
+      ? `${proxyBaseUrl}/sso/key/generate`
+      : `/sso/key/generate`;
    console.log("Full URL:", url);
    window.location.href = url;

    return null;
-  }
-  else if (accessToken == null) {
+  } else if (accessToken == null) {
    return null;
  }

  if (userRole == null) {
-    setUserRole("App Owner")
+    setUserRole("App Owner");
  }

  return (
    <div>
-      <Navbar
-        userID={userID}
-        userRole={userRole}
-      />
+      <Navbar userID={userID} userRole={userRole} userEmail={userEmail} />
      <Grid numItems={1} className="gap-0 p-10 h-[75vh] w-full">
        <Col numColSpan={1}>
-        <ViewUserSpend
-          userID={userID}
-          userSpendData={userSpendData}
-        />
+          <ViewUserSpend userID={userID} userSpendData={userSpendData} />
          <ViewKeyTable
            userID={userID}
            accessToken={accessToken}
@ -138,7 +137,6 @@ const UserDashboard = () => {
        </Col>
      </Grid>
    </div>
-
  );
 };

--- a/ui/litellm-dashboard/src/components/view_key_spend_report.tsx
+++ b/ui/litellm-dashboard/src/components/view_key_spend_report.tsx
@ -1,8 +1,26 @@
 "use client";

 import React, { useState, useEffect } from "react";
-import { Button as Button2, Modal, Form, Input, InputNumber, Select, message } from "antd";
-import { Button, Text, Card, Table, BarChart, Title, Subtitle, BarList, Metric  } from "@tremor/react";
+import {
+  Button as Button2,
+  Modal,
+  Form,
+  Input,
+  InputNumber,
+  Select,
+  message,
+} from "antd";
+import {
+  Button,
+  Text,
+  Card,
+  Table,
+  BarChart,
+  Title,
+  Subtitle,
+  BarList,
+  Metric,
+} from "@tremor/react";
 import { keySpendLogsCall } from "./networking";

 interface ViewKeySpendReportProps {
@ -19,13 +37,25 @@ type ResponseValueType = {
  user: string; // Assuming user is a string, adjust it if it's of a different type
 };

-const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessToken, keySpend, keyBudget, keyName }) => {
+const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({
+  token,
+  accessToken,
+  keySpend,
+  keyBudget,
+  keyName,
+}) => {
  const [isModalVisible, setIsModalVisible] = useState(false);
-  const [data, setData] = useState<{ day: string; spend: number; }[] | null>(null);
-  const [userData, setUserData] = useState<{ name: string; value: number; }[] | null>(null);
+  const [data, setData] = useState<{ day: string; spend: number }[] | null>(
+    null
+  );
+  const [userData, setUserData] = useState<
+    { name: string; value: number }[] | null
+  >(null);

  const showModal = () => {
+    console.log("Show Modal triggered");
    setIsModalVisible(true);
+    fetchData();
  };

  const handleOk = () => {
@ -42,22 +72,29 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessTo
      if (accessToken == null || token == null) {
        return;
      }
-      const response = await keySpendLogsCall(accessToken=accessToken, token=token);
+      console.log(`accessToken: ${accessToken}; token: ${token}`);
+      const response = await keySpendLogsCall(
+        (accessToken = accessToken),
+        (token = token)
+      );
      console.log("Response:", response);
      // loop through response
      // get spend, startTime for each element, place in new array

-
-      const pricePerDay: Record<string, number> = (Object.values(response) as ResponseValueType[]).reduce((acc: Record<string, number>, value) => {
+      const pricePerDay: Record<string, number> = (
+        Object.values(response) as ResponseValueType[]
+      ).reduce((acc: Record<string, number>, value) => {
        const startTime = new Date(value.startTime);
-        const day = new Intl.DateTimeFormat('en-US', { day: '2-digit', month: 'short' }).format(startTime);
+        const day = new Intl.DateTimeFormat("en-US", {
+          day: "2-digit",
+          month: "short",
+        }).format(startTime);

        acc[day] = (acc[day] || 0) + value.spend;

        return acc;
      }, {});

-      
      // sort pricePerDay by day
      // Convert object to array of key-value pairs
      const pricePerDayArray = Object.entries(pricePerDay);
@ -72,17 +109,17 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessTo
      // Convert the sorted array back to an object
      const sortedPricePerDay = Object.fromEntries(pricePerDayArray);

-
      console.log(sortedPricePerDay);

-      const pricePerUser: Record<string, number> = (Object.values(response) as ResponseValueType[]).reduce((acc: Record<string, number>, value) => {
+      const pricePerUser: Record<string, number> = (
+        Object.values(response) as ResponseValueType[]
+      ).reduce((acc: Record<string, number>, value) => {
        const user = value.user;
        acc[user] = (acc[user] || 0) + value.spend;

        return acc;
      }, {});

-    
      console.log(pricePerDay);
      console.log(pricePerUser);

@ -97,11 +134,15 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessTo
        arrayBarChart.push({ day: key, spend: value });
      }

-
      // get 5 most expensive users
-      const sortedUsers = Object.entries(pricePerUser).sort((a, b) => b[1] - a[1]);
+      const sortedUsers = Object.entries(pricePerUser).sort(
+        (a, b) => b[1] - a[1]
+      );
      const top5Users = sortedUsers.slice(0, 5);
-      const userChart = top5Users.map(([key, value]) => ({ name: key, value: value }));
+      const userChart = top5Users.map(([key, value]) => ({
+        name: key,
+        value: value,
+      }));

      setData(arrayBarChart);
      setUserData(userChart);
@ -112,11 +153,10 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessTo
    }
  };

-  useEffect(() => {
-    // Fetch data only when the token changes
-    fetchData();
-  }, [token]);  // Dependency array containing the 'token' variable
-
+  // useEffect(() => {
+  //   // Fetch data only when the token changes
+  //   fetchData();
+  // }, [token]); // Dependency array containing the 'token' variable

  if (!token) {
    return null;
@ -134,7 +174,7 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessTo
        onCancel={handleCancel}
        footer={null}
      >
-        <Title style={{ textAlign: 'left' }}>Key Name: {keyName}</Title>
+        <Title style={{ textAlign: "left" }}>Key Name: {keyName}</Title>

        <Metric>Monthly Spend ${keySpend}</Metric>

@ -153,14 +193,9 @@ const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({ token, accessTo
        <Title className="mt-6">Top 5 Users Spend (USD)</Title>
        <Card className="mb-6">
          {userData && (
-            <BarList
-            className="mt-6"
-            data={userData}
-            color="teal"
-            />
+            <BarList className="mt-6" data={userData} color="teal" />
          )}
        </Card>
-
      </Modal>
    </div>
  );
--- a/ui/litellm-dashboard/src/components/view_key_table.tsx
+++ b/ui/litellm-dashboard/src/components/view_key_table.tsx
@ -1,5 +1,5 @@
 "use client";
-import React, { useEffect } from "react";
+import React, { useEffect, useState } from "react";
 import { keyDeleteCall } from "./networking";
 import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
 import {
@ -32,6 +32,8 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
  data,
  setData,
 }) => {
+  const [isButtonClicked, setIsButtonClicked] = useState(false);
+
  const handleDelete = async (token: String) => {
    if (data == null) {
      return;
@ -116,8 +118,13 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
                  />
                </TableCell>
                <TableCell>
-                  <ViewKeySpendReport token={item.token} accessToken={accessToken} keySpend={item.spend} keyBudget={item.max_budget} keyName={item.key_name} />
-                
+                  <ViewKeySpendReport
+                    token={item.token}
+                    accessToken={accessToken}
+                    keySpend={item.spend}
+                    keyBudget={item.max_budget}
+                    keyName={item.key_name}
+                  />
                </TableCell>
              </TableRow>
            );
				`@ -1 +0,0 @@`
				`(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);`
				`@ -1 +0,0 @@`
				`self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();`
				`@ -1 +0,0 @@`
				`self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()`