Merge branch 'BerriAI:main' into feature/watsonx-integration

2024-04-21 10:35:51 +02:00 · 2024-04-21 10:35:51 +02:00 · a77537ddd4
commit a77537ddd4
parent c36cb7d938 cb3c98abe4
45 changed files with 1027 additions and 281 deletions
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -279,7 +279,7 @@ router_settings:
 ```
 </TabItem>
-<TabItem value="simple-shuffle" label="(Default) Weighted Pick">
+<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">
 **Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -19,7 +19,7 @@ class PrometheusLogger:
        **kwargs,
    ):
        try:
-            verbose_logger.debug(f"in init prometheus metrics")
+            print(f"in init prometheus metrics")
            from prometheus_client import Counter
            self.litellm_llm_api_failed_requests_metric = Counter(
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -44,9 +44,18 @@ class PrometheusServicesLogger:
            )  # store the prometheus histogram/counter we need to call for each field in payload
            for service in self.services:
-                histogram = self.create_histogram(service)
+                histogram = self.create_histogram(service, type_of_request="latency")
-                counter = self.create_counter(service)
+                counter_failed_request = self.create_counter(
-                self.payload_to_prometheus_map[service] = [histogram, counter]
+                    service, type_of_request="failed_requests"
                )
                counter_total_requests = self.create_counter(
                    service, type_of_request="total_requests"
                )
                self.payload_to_prometheus_map[service] = [
                    histogram,
                    counter_failed_request,
                    counter_total_requests,
                ]
            self.prometheus_to_amount_map: dict = (
                {}
@ -74,26 +83,26 @@ class PrometheusServicesLogger:
                    return metric
        return None
-    def create_histogram(self, label: str):
+    def create_histogram(self, service: str, type_of_request: str):
-        metric_name = "litellm_{}_latency".format(label)
+        metric_name = "litellm_{}_{}".format(service, type_of_request)
        is_registered = self.is_metric_registered(metric_name)
        if is_registered:
            return self.get_metric(metric_name)
        return self.Histogram(
            metric_name,
-            "Latency for {} service".format(label),
+            "Latency for {} service".format(service),
-            labelnames=[label],
+            labelnames=[service],
        )
-    def create_counter(self, label: str):
+    def create_counter(self, service: str, type_of_request: str):
-        metric_name = "litellm_{}_failed_requests".format(label)
+        metric_name = "litellm_{}_{}".format(service, type_of_request)
        is_registered = self.is_metric_registered(metric_name)
        if is_registered:
            return self.get_metric(metric_name)
        return self.Counter(
            metric_name,
-            "Total failed requests for {} service".format(label),
+            "Total {} for {} service".format(type_of_request, service),
-            labelnames=[label],
+            labelnames=[service],
        )
    def observe_histogram(
@ -120,6 +129,8 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_success_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -129,11 +140,19 @@ class PrometheusServicesLogger:
                        labels=payload.service.value,
                        amount=payload.duration,
                    )
                elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
                    self.increment_counter(
                        counter=obj,
                        labels=payload.service.value,
                        amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
                    )
    def service_failure_hook(self, payload: ServiceLoggerPayload):
        if self.mock_testing:
            self.mock_testing_failure_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -141,7 +160,7 @@ class PrometheusServicesLogger:
                    self.increment_counter(
                        counter=obj,
                        labels=payload.service.value,
-                        amount=1,  # LOG ERROR COUNT TO PROMETHEUS
+                        amount=1,  # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
                    )
    async def async_service_success_hook(self, payload: ServiceLoggerPayload):
@ -151,6 +170,8 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_success_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -160,12 +181,20 @@ class PrometheusServicesLogger:
                        labels=payload.service.value,
                        amount=payload.duration,
                    )
                elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
                    self.increment_counter(
                        counter=obj,
                        labels=payload.service.value,
                        amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
                    )
    async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
        print(f"received error payload: {payload.error}")
        if self.mock_testing:
            self.mock_testing_failure_calls += 1
        print(f"payload call type: {payload.call_type}")
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -507,10 +507,11 @@ def construct_tool_use_system_prompt(
 ):  # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
    tool_str_list = []
    for tool in tools:
        tool_function = get_attribute_or_key(tool, "function")
        tool_str = construct_format_tool_for_claude_prompt(
-            tool["function"]["name"],
+            get_attribute_or_key(tool_function, "name"),
-            tool["function"].get("description", ""),
+            get_attribute_or_key(tool_function, "description", ""),
-            tool["function"].get("parameters", {}),
+            get_attribute_or_key(tool_function, "parameters", {}),
        )
        tool_str_list.append(tool_str)
    tool_use_system_prompt = (
@ -634,7 +635,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
    </function_results>
    """
    name = message.get("name")
-    content = message.get("content")
+    content = message.get("content", "")
    content = content.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")
    # We can't determine from openai message format whether it's a successful or
    # error call result so default to the successful result template
@ -655,13 +657,15 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
 def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
    invokes = ""
    for tool in tool_calls:
-        if tool["type"] != "function":
+        if get_attribute_or_key(tool, "type") != "function":
            continue
-        tool_name = tool["function"]["name"]
+        tool_function =  get_attribute_or_key(tool,"function")
        tool_name = get_attribute_or_key(tool_function, "name")
        tool_arguments = get_attribute_or_key(tool_function, "arguments")
        parameters = "".join(
            f"<{param}>{val}</{param}>\n"
-            for param, val in json.loads(tool["function"]["arguments"]).items()
+            for param, val in json.loads(tool_arguments).items()
        )
        invokes += (
            "<invoke>\n"
@ -715,7 +719,7 @@ def anthropic_messages_pt_xml(messages: list):
                    {
                        "type": "text",
                        "text": (
-                            convert_to_anthropic_tool_result(messages[msg_i])
+                            convert_to_anthropic_tool_result_xml(messages[msg_i])
                            if messages[msg_i]["role"] == "tool"
                            else messages[msg_i]["content"]
                        ),
@ -736,7 +740,7 @@ def anthropic_messages_pt_xml(messages: list):
            if messages[msg_i].get(
                "tool_calls", []
            ):  # support assistant tool invoke convertion
-                assistant_text += convert_to_anthropic_tool_invoke(  # type: ignore
+                assistant_text += convert_to_anthropic_tool_invoke_xml(  # type: ignore
                    messages[msg_i]["tool_calls"]
                )
@ -848,12 +852,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
    anthropic_tool_invoke = [
        {
            "type": "tool_use",
-            "id": tool["id"],
+            "id": get_attribute_or_key(tool, "id"),
-            "name": tool["function"]["name"],
+            "name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"),
-            "input": json.loads(tool["function"]["arguments"]),
+            "input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")),
        }
        for tool in tool_calls
-        if tool["type"] == "function"
+        if get_attribute_or_key(tool, "type") == "function"
    ]
    return anthropic_tool_invoke
@ -1074,7 +1078,8 @@ def cohere_message_pt(messages: list):
            tool_result = convert_openai_message_to_cohere_tool_result(message)
            tool_results.append(tool_result)
        else:
-            prompt += message["content"]
+            prompt += message["content"] + "\n\n"
    prompt = prompt.rstrip()
    return prompt, tool_results
@ -1414,3 +1419,8 @@ def prompt_factory(
        return default_pt(
            messages=messages
        )  # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
 def get_attribute_or_key(tool_or_function, attribute, default=None):
    if hasattr(tool_or_function, attribute):
        return getattr(tool_or_function, attribute)
    return tool_or_function.get(attribute, default)
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -123,7 +123,7 @@ class VertexAIAnthropicConfig:
 """
- Run client init 
+- Run client init
 - Support async completion, streaming
 """
@ -236,17 +236,19 @@ def completion(
        if client is None:
            if vertex_credentials is not None and isinstance(vertex_credentials, str):
                import google.oauth2.service_account
                json_obj = json.loads(vertex_credentials)
                creds = (
                    google.oauth2.service_account.Credentials.from_service_account_info(
-                        json_obj,
+                        json.loads(vertex_credentials),
                        scopes=["https://www.googleapis.com/auth/cloud-platform"],
                    )
                )
                ### CHECK IF ACCESS
                access_token = refresh_auth(credentials=creds)
            else:
                import google.auth
                creds, _ = google.auth.default()
                ### CHECK IF ACCESS
                access_token = refresh_auth(credentials=creds)
            vertex_ai_client = AnthropicVertex(
                project_id=vertex_project,
--- a/litellm/main.py
+++ b/litellm/main.py
@ -610,6 +610,7 @@ def completion(
        "client",
        "rpm",
        "tpm",
        "max_parallel_requests",
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_second",
@ -2598,6 +2599,7 @@ def embedding(
    client = kwargs.pop("client", None)
    rpm = kwargs.pop("rpm", None)
    tpm = kwargs.pop("tpm", None)
    max_parallel_requests = kwargs.pop("max_parallel_requests", None)
    model_info = kwargs.get("model_info", None)
    metadata = kwargs.get("metadata", None)
    encoding_format = kwargs.get("encoding_format", None)
@ -2655,6 +2657,7 @@ def embedding(
        "client",
        "rpm",
        "tpm",
        "max_parallel_requests",
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_second",
@ -3514,6 +3517,7 @@ def image_generation(
            "client",
            "rpm",
            "tpm",
            "max_parallel_requests",
            "input_cost_per_token",
            "output_cost_per_token",
            "hf_model_name",
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
+3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -4,14 +4,12 @@ model_list:
    model: openai/my-fake-model
    api_key: my-fake-key
    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
    # api_base: http://0.0.0.0:8080
    stream_timeout: 0.001
 - model_name: fake-openai-endpoint
  litellm_params:
    model: openai/my-fake-model-2
    api_key: my-fake-key
    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
    # api_base: http://0.0.0.0:8080
    stream_timeout: 0.001
 - litellm_params:
      model: azure/chatgpt-v-2
@ -30,13 +28,6 @@ model_list:
    # api_key: my-fake-key
    # api_base: https://exampleopenaiendpoint-production.up.railway.app/
 # litellm_settings:
 #   success_callback: ["prometheus"]
 #   failure_callback: ["prometheus"]
 #   service_callback: ["prometheus_system"]
 #   upperbound_key_generate_params: 
 #     max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
 router_settings:
  routing_strategy: usage-based-routing-v2 
  # redis_url: "os.environ/REDIS_URL"
@ -48,6 +39,10 @@ router_settings:
 litellm_settings:
  num_retries: 3 # retry call 3 times on each model_name
  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
  success_callback: ["prometheus"]
  failure_callback: ["prometheus"]
  service_callback: ["prometheus_system"]
 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum):
        "/v2/key/info",
    ]
    sso_only_routes: List = [
        "/key/generate",
        "/key/update",
        "/key/delete",
        "/global/spend/logs",
        "/global/predict/spend/logs",
    ]
    management_routes: List = [  # key
        "/key/generate",
        "/key/update",
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1053,6 +1053,11 @@ async def user_api_key_auth(
                                status_code=status.HTTP_403_FORBIDDEN,
                                detail="key not allowed to access this team's info",
                            )
                elif (
                    _has_user_setup_sso()
                    and route in LiteLLMRoutes.sso_only_routes.value
                ):
                    pass
                else:
                    raise Exception(
                        f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
@ -1102,6 +1107,13 @@ async def user_api_key_auth(
                    return UserAPIKeyAuth(
                        api_key=api_key, user_role="proxy_admin", **valid_token_dict
                    )
                elif (
                    _has_user_setup_sso()
                    and route in LiteLLMRoutes.sso_only_routes.value
                ):
                    return UserAPIKeyAuth(
                        api_key=api_key, user_role="app_owner", **valid_token_dict
                    )
                else:
                    raise Exception(
                        f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
@ -5721,6 +5733,20 @@ async def new_user(data: NewUserRequest):
            "user"  # only create a user, don't create key if 'auto_create_key' set to False
        )
    response = await generate_key_helper_fn(**data_json)
    # Admin UI Logic
    # if team_id passed add this user to the team
    if data_json.get("team_id", None) is not None:
        await team_member_add(
            data=TeamMemberAddRequest(
                team_id=data_json.get("team_id", None),
                member=Member(
                    user_id=data_json.get("user_id", None),
                    role="user",
                    user_email=data_json.get("user_email", None),
                ),
            )
        )
    return NewUserResponse(
        key=response.get("token", ""),
        expires=response.get("expires", None),
@ -6526,13 +6552,20 @@ async def team_member_add(
    existing_team_row = await prisma_client.get_data(  # type: ignore
        team_id=data.team_id, table_name="team", query_type="find_unique"
    )
    if existing_team_row is None:
        raise HTTPException(
            status_code=404,
            detail={
                "error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
            },
        )
    new_member = data.member
    existing_team_row.members_with_roles.append(new_member)
    complete_team_data = LiteLLM_TeamTable(
-        **existing_team_row.model_dump(),
+        **_get_pydantic_json_dict(existing_team_row),
    )
    team_row = await prisma_client.update_data(
@ -8120,36 +8153,33 @@ async def auth_callback(request: Request):
                }
                user_role = getattr(user_info, "user_role", None)
-            else:
+            ## check if user-email in db ##
-                ## check if user-email in db ##
+            user_info = await prisma_client.db.litellm_usertable.find_first(
-                user_info = await prisma_client.db.litellm_usertable.find_first(
+                where={"user_email": user_email}
-                    where={"user_email": user_email}
+            )
-                )
+            if user_info is not None:
-                if user_info is not None:
+                user_defined_values = {
-                    user_defined_values = {
+                    "models": getattr(user_info, "models", user_id_models),
-                        "models": getattr(user_info, "models", user_id_models),
+                    "user_id": getattr(user_info, "user_id", user_id),
-                        "user_id": getattr(user_info, "user_id", user_id),
+                    "user_email": getattr(user_info, "user_id", user_email),
-                        "user_email": getattr(user_info, "user_id", user_email),
+                    "user_role": getattr(user_info, "user_role", None),
-                        "user_role": getattr(user_info, "user_role", None),
+                }
-                    }
+                user_role = getattr(user_info, "user_role", None)
                    user_role = getattr(user_info, "user_role", None)
-                    # update id
+                # update id
-                    await prisma_client.db.litellm_usertable.update_many(
+                await prisma_client.db.litellm_usertable.update_many(
-                        where={"user_email": user_email}, data={"user_id": user_id}  # type: ignore
+                    where={"user_email": user_email}, data={"user_id": user_id}  # type: ignore
-                    )
+                )
-                elif litellm.default_user_params is not None and isinstance(
+            elif litellm.default_user_params is not None and isinstance(
-                    litellm.default_user_params, dict
+                litellm.default_user_params, dict
-                ):
+            ):
-                    user_defined_values = {
+                user_defined_values = {
-                        "models": litellm.default_user_params.get(
+                    "models": litellm.default_user_params.get("models", user_id_models),
-                            "models", user_id_models
+                    "user_id": litellm.default_user_params.get("user_id", user_id),
-                        ),
+                    "user_email": litellm.default_user_params.get(
-                        "user_id": litellm.default_user_params.get("user_id", user_id),
+                        "user_email", user_email
-                        "user_email": litellm.default_user_params.get(
+                    ),
-                            "user_email", user_email
+                }
                        ),
                    }
    except Exception as e:
        pass
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -238,7 +238,10 @@ class ProxyLogging:
            litellm_params = kwargs.get("litellm_params", {})
            model = kwargs.get("model", "")
            api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
-            messages = kwargs.get("messages", "")
+            messages = kwargs.get("messages", None)
            # if messages does not exist fallback to "input"
            if messages is None:
                messages = kwargs.get("input", None)
            # only use first 100 chars for alerting
            _messages = str(messages)[:100]
@ -282,7 +285,10 @@ class ProxyLogging:
    ):
        if request_data is not None:
            model = request_data.get("model", "")
-            messages = request_data.get("messages", "")
+            messages = request_data.get("messages", None)
            if messages is None:
                # if messages does not exist fallback to "input"
                messages = request_data.get("input", None)
            trace_id = request_data.get("metadata", {}).get(
                "trace_id", None
            )  # get langfuse trace id
--- a/litellm/router.py
+++ b/litellm/router.py
@ -26,7 +26,12 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
    CustomHTTPTransport,
    AsyncCustomHTTPTransport,
 )
-from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
+from litellm.utils import (
    ModelResponse,
    CustomStreamWrapper,
    get_utc_datetime,
    calculate_max_parallel_requests,
 )
 import copy
 from litellm._logging import verbose_router_logger
 import logging
@ -61,6 +66,7 @@ class Router:
        num_retries: int = 0,
        timeout: Optional[float] = None,
        default_litellm_params={},  # default params for Router.chat.completion.create
        default_max_parallel_requests: Optional[int] = None,
        set_verbose: bool = False,
        debug_level: Literal["DEBUG", "INFO"] = "INFO",
        fallbacks: List = [],
@ -198,6 +204,7 @@ class Router:
        )  # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
        self.default_deployment = None  # use this to track the users default deployment, when they want to use model = *
        self.default_max_parallel_requests = default_max_parallel_requests
        if model_list:
            model_list = copy.deepcopy(model_list)
@ -213,6 +220,7 @@ class Router:
        )  # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
        self.num_retries = num_retries or litellm.num_retries or 0
        self.timeout = timeout or litellm.request_timeout
        self.retry_after = retry_after
        self.routing_strategy = routing_strategy
        self.fallbacks = fallbacks or litellm.fallbacks
@ -298,7 +306,7 @@ class Router:
        else:
            litellm.failure_callback = [self.deployment_callback_on_failure]
        verbose_router_logger.info(
-            f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
+            f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
        )
        self.routing_strategy_args = routing_strategy_args
@ -496,7 +504,9 @@ class Router:
            )
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
                kwargs=kwargs,
                client_type="max_parallel_requests",
            )
            if rpm_semaphore is not None and isinstance(
@ -681,7 +691,9 @@ class Router:
            ### CONCURRENCY-SAFE RPM CHECKS ###
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
                kwargs=kwargs,
                client_type="max_parallel_requests",
            )
            if rpm_semaphore is not None and isinstance(
@ -803,7 +815,9 @@ class Router:
            ### CONCURRENCY-SAFE RPM CHECKS ###
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
                kwargs=kwargs,
                client_type="max_parallel_requests",
            )
            if rpm_semaphore is not None and isinstance(
@ -1049,7 +1063,9 @@ class Router:
            )
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
                kwargs=kwargs,
                client_type="max_parallel_requests",
            )
            if rpm_semaphore is not None and isinstance(
@ -1243,7 +1259,9 @@ class Router:
            ### CONCURRENCY-SAFE RPM CHECKS ###
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
                kwargs=kwargs,
                client_type="max_parallel_requests",
            )
            if rpm_semaphore is not None and isinstance(
@ -1862,17 +1880,23 @@ class Router:
        model_id = model["model_info"]["id"]
        # ### IF RPM SET - initialize a semaphore ###
        rpm = litellm_params.get("rpm", None)
-        if rpm:
+        tpm = litellm_params.get("tpm", None)
-            semaphore = asyncio.Semaphore(rpm)
+        max_parallel_requests = litellm_params.get("max_parallel_requests", None)
-            cache_key = f"{model_id}_rpm_client"
+        calculated_max_parallel_requests = calculate_max_parallel_requests(
            rpm=rpm,
            max_parallel_requests=max_parallel_requests,
            tpm=tpm,
            default_max_parallel_requests=self.default_max_parallel_requests,
        )
        if calculated_max_parallel_requests:
            semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
            cache_key = f"{model_id}_max_parallel_requests_client"
            self.cache.set_cache(
                key=cache_key,
                value=semaphore,
                local_only=True,
            )
        #     print("STORES SEMAPHORE IN CACHE")
        ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
        custom_llm_provider = litellm_params.get("custom_llm_provider")
        custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
@ -2537,8 +2561,8 @@ class Router:
            The appropriate client based on the given client_type and kwargs.
        """
        model_id = deployment["model_info"]["id"]
-        if client_type == "rpm_client":
+        if client_type == "max_parallel_requests":
-            cache_key = "{}_rpm_client".format(model_id)
+            cache_key = "{}_max_parallel_requests_client".format(model_id)
            client = self.cache.get_cache(key=cache_key, local_only=True)
            return client
        elif client_type == "async":
@ -2778,6 +2802,7 @@ class Router:
        """
        if (
            self.routing_strategy != "usage-based-routing-v2"
            and self.routing_strategy != "simple-shuffle"
        ):  # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
            return self.get_available_deployment(
                model=model,
@ -2828,6 +2853,25 @@ class Router:
                messages=messages,
                input=input,
            )
        elif self.routing_strategy == "simple-shuffle":
            # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
            ############## Check if we can do a RPM/TPM based weighted pick #################
            rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
            if rpm is not None:
                # use weight-random pick if rpms provided
                rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
                verbose_router_logger.debug(f"\nrpms {rpms}")
                total_rpm = sum(rpms)
                weights = [rpm / total_rpm for rpm in rpms]
                verbose_router_logger.debug(f"\n weights {weights}")
                # Perform weighted random pick
                selected_index = random.choices(range(len(rpms)), weights=weights)[0]
                verbose_router_logger.debug(f"\n selected index, {selected_index}")
                deployment = healthy_deployments[selected_index]
                verbose_router_logger.info(
                    f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
                )
                return deployment or deployment[0]
        if deployment is None:
            verbose_router_logger.info(
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -407,13 +407,15 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                tpm_keys.append(tpm_key)
                rpm_keys.append(rpm_key)
-        tpm_values = await self.router_cache.async_batch_get_cache(
+        combined_tpm_rpm_keys = tpm_keys + rpm_keys
-            keys=tpm_keys
+
-        )  # [1, 2, None, ..]
+        combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache(
-        rpm_values = await self.router_cache.async_batch_get_cache(
+            keys=combined_tpm_rpm_keys
            keys=rpm_keys
        )  # [1, 2, None, ..]
        tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
        rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
        return self._common_checks_available_deployment(
            model_group=model_group,
            healthy_deployments=healthy_deployments,
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling():
        assert isinstance(
            response.choices[0].message.tool_calls[0].function.arguments, str
        )
        messages.append(
            response.choices[0].message.model_dump()
        )  # Add assistant tool invokes
        tool_result = (
            '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
        )
        # Add user submitted tool results in the OpenAI format
        messages.append(
            {
                "tool_call_id": response.choices[0].message.tool_calls[0].id,
                "role": "tool",
                "name": response.choices[0].message.tool_calls[0].function.name,
                "content": tool_result,
            }
        )
        # In the second response, Claude should deduce answer from tool results
        second_response = completion(
            model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
            messages=messages,
            tools=tools,
            tool_choice="auto",
        )
        print(f"second response: {second_response}")
        assert isinstance(second_response.choices[0].message.content, str)
    except RateLimitError:
        pass
    except Exception as e:
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client):
        await litellm.proxy.proxy_server.prisma_client.connect()
        from litellm.proxy.proxy_server import user_api_key_cache
        await new_team(
            NewTeamRequest(
                team_id="ishaan-special-team",
            ),
            user_api_key_dict=UserAPIKeyAuth(
                user_role="proxy_admin", api_key="sk-1234", user_id="1234"
            ),
        )
        _response = await new_user(
            data=NewUserRequest(
                models=["azure-gpt-3.5"],
@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client):
        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            # create team "litellm-core-infra@gmail.com""
            print("creating team litellm-core-infra@gmail.com")
            await new_team(
                NewTeamRequest(
                    team_id="litellm-core-infra@gmail.com",
                ),
                user_api_key_dict=UserAPIKeyAuth(
                    user_role="proxy_admin", api_key="sk-1234", user_id="1234"
                ),
            )
            await new_team(
                NewTeamRequest(
                    team_id="ishaan-special-team",
                ),
                user_api_key_dict=UserAPIKeyAuth(
                    user_role="proxy_admin", api_key="sk-1234", user_id="1234"
                ),
            )
            request = NewUserRequest(
-                metadata={"team": "litellm-team3", "project": "litellm-project3"},
+                metadata={"project": "litellm-project3"},
                team_id="litellm-core-infra@gmail.com",
            )
            key = await new_user(request)
            print(key)
@ -1015,7 +1046,6 @@ def test_generate_and_update_key(prisma_client):
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
                "team": "litellm-team3",
                "project": "litellm-project3",
            }
            assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
@ -1037,7 +1067,7 @@ def test_generate_and_update_key(prisma_client):
            # update the team id
            response2 = await update_key_fn(
                request=Request,
-                data=UpdateKeyRequest(key=generated_key, team_id="ishaan"),
+                data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"),
            )
            print("response2=", response2)
@ -1048,11 +1078,10 @@ def test_generate_and_update_key(prisma_client):
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
                "team": "litellm-team3",
                "project": "litellm-project3",
            }
            assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
-            assert result["info"]["team_id"] == "ishaan"
+            assert result["info"]["team_id"] == "ishaan-special-team"
            # cleanup - delete key
            delete_key_request = KeyRequest(keys=[generated_key])
@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client):
        await litellm.proxy.proxy_server.prisma_client.connect()
        from litellm.proxy.proxy_server import user_api_key_cache
        await new_team(
            NewTeamRequest(
                team_id="ishaans-special-team",
            ),
            user_api_key_dict=UserAPIKeyAuth(
                user_role="proxy_admin", api_key="sk-1234", user_id="1234"
            ),
        )
        _response = await new_user(
            data=NewUserRequest(
                models=["azure-gpt-3.5"],
--- a/litellm/tests/test_router_debug_logs.py
+++ b/litellm/tests/test_router_debug_logs.py
@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
    # Define the expected log messages
    # - error request, falling back notice, success notice
    expected_logs = [
-        "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None",
+        "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None",
        "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
        "Falling back to model_group = azure/gpt-3.5-turbo",
        "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
--- a/litellm/tests/test_router_max_parallel_requests.py
+++ b/litellm/tests/test_router_max_parallel_requests.py
@ -0,0 +1,115 @@
 # What is this?
 ## Unit tests for the max_parallel_requests feature on Router
 import sys, os, time, inspect, asyncio, traceback
 from datetime import datetime
 import pytest
 sys.path.insert(0, os.path.abspath("../.."))
 import litellm
 from litellm.utils import calculate_max_parallel_requests
 from typing import Optional
 """
 - only rpm
 - only tpm
 - only max_parallel_requests 
 - max_parallel_requests + rpm 
 - max_parallel_requests + tpm
 - max_parallel_requests + tpm + rpm 
 """
 max_parallel_requests_values = [None, 10]
 tpm_values = [None, 20, 300000]
 rpm_values = [None, 30]
 default_max_parallel_requests = [None, 40]
@pytest.mark.parametrize(
    "max_parallel_requests, tpm, rpm, default_max_parallel_requests",
    [
        (mp, tp, rp, dmp)
        for mp in max_parallel_requests_values
        for tp in tpm_values
        for rp in rpm_values
        for dmp in default_max_parallel_requests
    ],
 )
 def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests):
    calculated_max_parallel_requests = calculate_max_parallel_requests(
        max_parallel_requests=max_parallel_requests,
        rpm=rpm,
        tpm=tpm,
        default_max_parallel_requests=default_max_parallel_requests,
    )
    if max_parallel_requests is not None:
        assert max_parallel_requests == calculated_max_parallel_requests
    elif rpm is not None:
        assert rpm == calculated_max_parallel_requests
    elif tpm is not None:
        calculated_rpm = int(tpm / 1000 / 6)
        if calculated_rpm == 0:
            calculated_rpm = 1
        print(
            f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}"
        )
        assert calculated_rpm == calculated_max_parallel_requests
    elif default_max_parallel_requests is not None:
        assert calculated_max_parallel_requests == default_max_parallel_requests
    else:
        assert calculated_max_parallel_requests is None
@pytest.mark.parametrize(
    "max_parallel_requests, tpm, rpm, default_max_parallel_requests",
    [
        (mp, tp, rp, dmp)
        for mp in max_parallel_requests_values
        for tp in tpm_values
        for rp in rpm_values
        for dmp in default_max_parallel_requests
    ],
 )
 def test_setting_mpr_limits_per_model(
    max_parallel_requests, tpm, rpm, default_max_parallel_requests
 ):
    deployment = {
        "model_name": "gpt-3.5-turbo",
        "litellm_params": {
            "model": "gpt-3.5-turbo",
            "max_parallel_requests": max_parallel_requests,
            "tpm": tpm,
            "rpm": rpm,
        },
        "model_info": {"id": "my-unique-id"},
    }
    router = litellm.Router(
        model_list=[deployment],
        default_max_parallel_requests=default_max_parallel_requests,
    )
    mpr_client: Optional[asyncio.Semaphore] = router._get_client(
        deployment=deployment,
        kwargs={},
        client_type="max_parallel_requests",
    )
    if max_parallel_requests is not None:
        assert max_parallel_requests == mpr_client._value
    elif rpm is not None:
        assert rpm == mpr_client._value
    elif tpm is not None:
        calculated_rpm = int(tpm / 1000 / 6)
        if calculated_rpm == 0:
            calculated_rpm = 1
        print(
            f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}"
        )
        assert calculated_rpm == mpr_client._value
    elif default_max_parallel_requests is not None:
        assert mpr_client._value == default_max_parallel_requests
    else:
        assert mpr_client is None
    # raise Exception("it worked!")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5434,6 +5434,49 @@ def get_optional_params(
    return optional_params
 def calculate_max_parallel_requests(
    max_parallel_requests: Optional[int],
    rpm: Optional[int],
    tpm: Optional[int],
    default_max_parallel_requests: Optional[int],
 ) -> Optional[int]:
    """
    Returns the max parallel requests to send to a deployment.
    Used in semaphore for async requests on router.
    Parameters:
    - max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
    - rpm - Optional[int] - requests per minute allowed for that deployment
    - tpm - Optional[int] - tokens per minute allowed for that deployment
    - default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
    Returns:
    - int or None (if all params are None)
    Order:
    max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
    Azure RPM formula:
    6 rpm per 1000 TPM
    https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
    """
    if max_parallel_requests is not None:
        return max_parallel_requests
    elif rpm is not None:
        return rpm
    elif tpm is not None:
        calculated_rpm = int(tpm / 1000 / 6)
        if calculated_rpm == 0:
            calculated_rpm = 1
        return calculated_rpm
    elif default_max_parallel_requests is not None:
        return default_max_parallel_requests
    return None
 def get_api_base(model: str, optional_params: dict) -> Optional[str]:
    """
    Returns the api base used for calling the model.
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -96,9 +96,9 @@ litellm_settings:
 router_settings:
  routing_strategy: usage-based-routing-v2 
-  redis_host: os.environ/REDIS_HOST
+  # redis_host: os.environ/REDIS_HOST
-  redis_password: os.environ/REDIS_PASSWORD
+  # redis_password: os.environ/REDIS_PASSWORD
-  redis_port: os.environ/REDIS_PORT
+  # redis_port: os.environ/REDIS_PORT
  enable_pre_call_checks: true
 general_settings: 
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.35.17"
+version = "1.35.18"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
-version = "1.35.17"
+version = "1.35.18"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -14,6 +14,24 @@ sys.path.insert(
 import litellm
 async def generate_team(session):
    url = "http://0.0.0.0:4000/team/new"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {
        "team_id": "litellm-dashboard",
    }
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
        print(f"Response (Status code: {status}):")
        print(response_text)
        print()
        _json_response = await response.json()
        return _json_response
 async def generate_user(
    session,
    user_role="app_owner",
@ -668,7 +686,7 @@ async def test_key_rate_limit():
@pytest.mark.asyncio
-async def test_key_delete():
+async def test_key_delete_ui():
    """
    Admin UI flow - DO NOT DELETE
    -> Create a key with user_id = "ishaan"
@ -680,6 +698,8 @@ async def test_key_delete():
        key = key_gen["key"]
        # generate a admin UI key
        team = await generate_team(session=session)
        print("generated team: ", team)
        admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
        print(
            "trying to delete key=",
--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit():
            await asyncio.gather(*tasks)
            pytest.fail("Expected at least 1 call to fail")
        except Exception as e:
-            pass
+            if "Request did not return a 200 status code: 429" in str(e):
                pass
            else:
                pytest.fail(f"Wrong error received - {str(e)}")
@pytest.mark.asyncio
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
+3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -9,6 +9,7 @@ import Teams from "@/components/teams";
 import AdminPanel from "@/components/admins";
 import Settings from "@/components/settings";
 import GeneralSettings from "@/components/general_settings";
 import APIRef from "@/components/api_ref";
 import ChatUI from "@/components/chat_ui";
 import Sidebar from "../components/leftnav";
 import Usage from "../components/usage";
@ -165,6 +166,8 @@ const CreateKeyPage = () => {
              accessToken={accessToken}
              showSSOBanner={showSSOBanner}
            />
          ) : page == "api_ref" ? (
              <APIRef/>
          ) : page == "settings" ? (
            <Settings
              userID={userID}
--- a/ui/litellm-dashboard/src/components/api_ref.tsx
+++ b/ui/litellm-dashboard/src/components/api_ref.tsx
@ -0,0 +1,152 @@
 "use client";
 import React, { useEffect, useState } from "react";
 import {
  Badge,
  Card,
  Table,
  Metric,
  TableBody,
  TableCell,
  TableHead,
  TableHeaderCell,
  TableRow,
  Text,
  Title,
  Icon,
  Accordion,
  AccordionBody,
  AccordionHeader,
  List,
  ListItem,
  Tab,
  TabGroup,
  TabList,
  TabPanel,
  TabPanels,
  Grid,
 } from "@tremor/react";
 import { Statistic } from "antd"
 import { modelAvailableCall }  from "./networking";
 import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
 const APIRef = ({}) => {
    return (
        <>
         <Grid className="gap-2 p-8 h-[80vh] w-full mt-2">
        <div className="mb-5">
            <p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">OpenAI Compatible Proxy: API Reference</p>        
            <Text className="mt-2 mb-2">LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below </Text>
                <TabGroup>
                  <TabList>
                    <Tab>OpenAI Python SDK</Tab>
                    <Tab>LlamaIndex</Tab>
                    <Tab>Langchain Py</Tab>
                  </TabList>
                  <TabPanels>
                    <TabPanel>
                      <SyntaxHighlighter language="python">
                        {`
 import openai
 client = openai.OpenAI(
    api_key="your_api_key",
    base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys
 )
 response = client.chat.completions.create(
    model="gpt-3.5-turbo", # model to send to the proxy
    messages = [
        {
            "role": "user",
            "content": "this is a test request, write a short poem"
        }
    ]
 )
 print(response)
            `}
                      </SyntaxHighlighter>
                    </TabPanel>
                    <TabPanel>
                      <SyntaxHighlighter language="python">
                        {`
 import os, dotenv
 from llama_index.llms import AzureOpenAI
 from llama_index.embeddings import AzureOpenAIEmbedding
 from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 llm = AzureOpenAI(
    engine="azure-gpt-3.5",               # model_name on litellm proxy
    temperature=0.0,
    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
    api_key="sk-1234",                    # litellm proxy API Key
    api_version="2023-07-01-preview",
 )
 embed_model = AzureOpenAIEmbedding(
    deployment_name="azure-embedding-model",
    azure_endpoint="http://0.0.0.0:4000",
    api_key="sk-1234",
    api_version="2023-07-01-preview",
 )
 documents = SimpleDirectoryReader("llama_index_data").load_data()
 service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
 index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do growing up?")
 print(response)
            `}
                      </SyntaxHighlighter>
                    </TabPanel>
                    <TabPanel>
                      <SyntaxHighlighter language="python">
                        {`
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
 )
 from langchain.schema import HumanMessage, SystemMessage
 chat = ChatOpenAI(
    openai_api_base="http://0.0.0.0:4000",
    model = "gpt-3.5-turbo",
    temperature=0.1
 )
 messages = [
    SystemMessage(
        content="You are a helpful assistant that im using to make a test request to."
    ),
    HumanMessage(
        content="test from litellm. tell me why it's amazing in 1 sentence"
    ),
 ]
 response = chat(messages)
 print(response)
            `}
                      </SyntaxHighlighter>
                    </TabPanel>
                  </TabPanels>
                </TabGroup>
        </div>
        </Grid>
    </>
    )
 }
 export default APIRef;
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@ -13,12 +13,12 @@ import {
  TabGroup,
  TabList,
  TabPanel,
  TabPanels,
  Metric,
  Col,
  Text,
  SelectItem,
  TextInput,
  TabPanels,
  Button,
 } from "@tremor/react";
@ -201,7 +201,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
          <TabGroup>
            <TabList>
              <Tab>Chat</Tab>
              <Tab>API Reference</Tab>
            </TabList>
            <TabPanels>
@ -272,124 +271,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
                  </div>
                </div>
              </TabPanel>
-              <TabPanel>
+              
                <TabGroup>
                  <TabList>
                    <Tab>OpenAI Python SDK</Tab>
                    <Tab>LlamaIndex</Tab>
                    <Tab>Langchain Py</Tab>
                  </TabList>
                  <TabPanels>
                    <TabPanel>
                      <SyntaxHighlighter language="python">
                        {`
 import openai
 client = openai.OpenAI(
    api_key="your_api_key",
    base_url="http://0.0.0.0:4000" # proxy base url
 )
 response = client.chat.completions.create(
    model="gpt-3.5-turbo", # model to use from Models Tab
    messages = [
        {
            "role": "user",
            "content": "this is a test request, write a short poem"
        }
    ],
    extra_body={
        "metadata": {
            "generation_name": "ishaan-generation-openai-client",
            "generation_id": "openai-client-gen-id22",
            "trace_id": "openai-client-trace-id22",
            "trace_user_id": "openai-client-user-id2"
        }
    }
 )
 print(response)
            `}
                      </SyntaxHighlighter>
                    </TabPanel>
                    <TabPanel>
                      <SyntaxHighlighter language="python">
                        {`
 import os, dotenv
 from llama_index.llms import AzureOpenAI
 from llama_index.embeddings import AzureOpenAIEmbedding
 from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
 llm = AzureOpenAI(
    engine="azure-gpt-3.5",               # model_name on litellm proxy
    temperature=0.0,
    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
    api_key="sk-1234",                    # litellm proxy API Key
    api_version="2023-07-01-preview",
 )
 embed_model = AzureOpenAIEmbedding(
    deployment_name="azure-embedding-model",
    azure_endpoint="http://0.0.0.0:4000",
    api_key="sk-1234",
    api_version="2023-07-01-preview",
 )
 documents = SimpleDirectoryReader("llama_index_data").load_data()
 service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
 index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 query_engine = index.as_query_engine()
 response = query_engine.query("What did the author do growing up?")
 print(response)
            `}
                      </SyntaxHighlighter>
                    </TabPanel>
                    <TabPanel>
                      <SyntaxHighlighter language="python">
                        {`
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
 )
 from langchain.schema import HumanMessage, SystemMessage
 chat = ChatOpenAI(
    openai_api_base="http://0.0.0.0:8000",
    model = "gpt-3.5-turbo",
    temperature=0.1,
    extra_body={
        "metadata": {
            "generation_name": "ishaan-generation-langchain-client",
            "generation_id": "langchain-client-gen-id22",
            "trace_id": "langchain-client-trace-id22",
            "trace_user_id": "langchain-client-user-id2"
        }
    }
 )
 messages = [
    SystemMessage(
        content="You are a helpful assistant that im using to make a test request to."
    ),
    HumanMessage(
        content="test from litellm. tell me why it's amazing in 1 sentence"
    ),
 ]
 response = chat(messages)
 print(response)
            `}
                      </SyntaxHighlighter>
                    </TabPanel>
                  </TabPanels>
                </TabGroup>
              </TabPanel>
            </TabPanels>
          </TabGroup>
        </Card>
--- a/ui/litellm-dashboard/src/components/create_key_button.tsx
+++ b/ui/litellm-dashboard/src/components/create_key_button.tsx
@ -2,7 +2,7 @@
 import React, { useState, useEffect, useRef } from "react";
 import { Button, TextInput, Grid, Col } from "@tremor/react";
-import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
+import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react";
 import { CopyToClipboard } from 'react-copy-to-clipboard';
 import {
  Button as Button2,
@ -147,6 +147,17 @@ const CreateKey: React.FC<CreateKeyProps> = ({
                  mode="multiple"
                  placeholder="Select models"
                  style={{ width: "100%" }}
                  onChange={(values) => {
                    // Check if "All Team Models" is selected
                    const isAllTeamModelsSelected = values.includes("all-team-models");
                    // If "All Team Models" is selected, deselect all other models
                    if (isAllTeamModelsSelected) {
                      const newValues = ["all-team-models"];
                      // You can call the form's setFieldsValue method to update the value
                      form.setFieldsValue({ models: newValues });
                    }
                  }}
                >
                    <Option key="all-team-models" value="all-team-models">
                      All Team Models
@ -248,16 +259,153 @@ const CreateKey: React.FC<CreateKeyProps> = ({
            </>
          ) : (
            <>
-              <Form.Item label="Key Name" name="key_alias">
+              <Form.Item 
                label="Key Name" 
                name="key_alias"
                rules={[{ required: true, message: 'Please input a key name' }]}
                help="required"
              >
                <Input />
              </Form.Item>
-              <Form.Item label="Team ID (Contact Group)" name="team_id">
+              <Form.Item
-                <Input placeholder="default team (create a new team)" />
+                label="Team ID"
                name="team_id"
                hidden={true}
                initialValue={team ? team["team_id"] : null}
                valuePropName="team_id"
                className="mt-8"
              >
                <Input value={team ? team["team_alias"] : ""} disabled />
              </Form.Item>
-              <Form.Item label="Description" name="description">
+              <Form.Item 
-                <Input.TextArea placeholder="Enter description" rows={4} />
+                label="Models" 
                name="models"
                className="mb-12"
                rules={[{ required: true, message: 'Please select a model' }]}
                help="required"
              >
                <Select
                  mode="multiple"
                  placeholder="Select models"
                  style={{ width: "100%" }}
                  onChange={(values) => {
                    const isAllTeamModelsSelected = values.includes("all-team-models");
                    if (isAllTeamModelsSelected) {
                      const newValues = ["all-team-models"];
                      form.setFieldsValue({ models: newValues });
                    }
                  }}
                >
                    <Option key="all-team-models" value="all-team-models">
                      All Team Models
                    </Option>
                    {team && team.models ? (
                      team.models.includes("all-proxy-models") ? (
                        userModels.map((model: string) => (
                          (
                            <Option key={model} value={model}>
                              {model}
                            </Option>
                          )
                        ))
                      ) : (
                        team.models.map((model: string) => (
                          <Option key={model} value={model}>
                            {model}
                          </Option>
                        ))
                      )
                    ) : (
                      userModels.map((model: string) => (
                        <Option key={model} value={model}>
                          {model}
                        </Option>
                      ))
                    )}
                </Select>
              </Form.Item>
              <Accordion className="mt-20 mb-8" >
                <AccordionHeader>
                  <b>Optional Settings</b>
                </AccordionHeader>
                <AccordionBody>
                <Form.Item 
                className="mt-8"
                label="Max Budget (USD)" 
                name="max_budget" 
                help={`Budget cannot exceed team max budget: $${team?.max_budget !== null && team?.max_budget !== undefined ? team?.max_budget : 'unlimited'}`}
                rules={[
                  {
                      validator: async (_, value) => {
                          if (value && team && team.max_budget !== null && value > team.max_budget) {
                              throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`);
                          }
                      },
                  },
              ]}
              >
                <InputNumber step={0.01} precision={2} width={200} />
              </Form.Item>
              <Form.Item 
              className="mt-8"
              label="Reset Budget" 
              name="budget_duration"
              help={`Team Reset Budget: ${team?.budget_duration !== null && team?.budget_duration !== undefined ? team?.budget_duration : 'None'}`}
              >
                <Select defaultValue={null} placeholder="n/a">
                  <Select.Option value="24h">daily</Select.Option>
                  <Select.Option value="30d">monthly</Select.Option>
                </Select>
              </Form.Item>
              <Form.Item 
                className="mt-8"
                label="Tokens per minute Limit (TPM)" 
                name="tpm_limit" 
                help={`TPM cannot exceed team TPM limit: ${team?.tpm_limit !== null && team?.tpm_limit !== undefined ? team?.tpm_limit : 'unlimited'}`}
                rules={[
                  {
                      validator: async (_, value) => {
                          if (value && team && team.tpm_limit !== null && value > team.tpm_limit) {
                              throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`);
                          }
                      },
                  },
              ]}
                >
                <InputNumber step={1} width={400} />
              </Form.Item>
              <Form.Item
                className="mt-8"
                label="Requests per minute Limit (RPM)"
                name="rpm_limit"
                help={`RPM cannot exceed team RPM limit: ${team?.rpm_limit !== null && team?.rpm_limit !== undefined ? team?.rpm_limit : 'unlimited'}`}
                rules={[
                  {
                      validator: async (_, value) => {
                          if (value && team && team.rpm_limit !== null && value > team.rpm_limit) {
                              throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`);
                          }
                      },
                  },
              ]}
              >
                <InputNumber step={1} width={400} />
              </Form.Item>
              <Form.Item label="Expire Key (eg: 30s, 30h, 30d)" name="duration" className="mt-8">
                <Input />
              </Form.Item>
              <Form.Item label="Metadata" name="metadata">
                <Input.TextArea rows={4} placeholder="Enter metadata as JSON" />
              </Form.Item>
                </AccordionBody>
              </Accordion>
            </>
          )}
          <div style={{ textAlign: "right", marginTop: "10px" }}>
--- a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx
+++ b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx
@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react";
 interface DashboardTeamProps {
  teams: Object[] | null;
  setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
  userRole: string | null;
 }
 type TeamInterface = {
@ -15,6 +16,7 @@ type TeamInterface = {
 const DashboardTeam: React.FC<DashboardTeamProps> = ({
  teams,
  setSelectedTeam,
  userRole,
 }) => {
  const defaultTeam: TeamInterface = {
    models: [],
@ -25,19 +27,26 @@ const DashboardTeam: React.FC<DashboardTeamProps> = ({
  const [value, setValue] = useState(defaultTeam);
-  const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
+  let updatedTeams;
-
+  if (userRole === "App User") {
    // Non-Admin SSO users should only see their own team - they should not see "Default Team"
    updatedTeams = teams;
  } else {
    updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
  }
  if (userRole === 'App User') return null;
  return (
    <div className="mt-5 mb-5">
      <Title>Select Team</Title>
      <Text>
-        If you belong to multiple teams, this setting controls which team is
+        If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys.
        used by default when creating new API Keys.
      </Text>
      <Text className="mt-3 mb-3">
-      <b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
+        <b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
-    </Text>
+      </Text>
      {updatedTeams && updatedTeams.length > 0 ? (
        <Select defaultValue="0">
          {updatedTeams.map((team: any, index) => (
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@ -46,8 +46,8 @@ const Sidebar: React.FC<SidebarProps> = ({
    );
  }
  return (
-    <Layout style={{ minHeight: "100vh", maxWidth: "120px" }}>
+    <Layout style={{ minHeight: "100vh", maxWidth: "130px" }}>
-      <Sider width={120}>
+      <Sider width={130}>
        <Menu
          mode="inline"
          defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
@ -63,11 +63,23 @@ const Sidebar: React.FC<SidebarProps> = ({
            Test Key
            </Text>
          </Menu.Item>
-          <Menu.Item key="2" onClick={() => setPage("models")}>
+
-          <Text>
+              <Menu.Item key="11" onClick={() => setPage("api_ref")}>
-            Models
+              <Text>
-            </Text>
+                API Reference
-          </Menu.Item>
+                </Text>
              </Menu.Item>
          {
            userRole == "Admin" ? (
              <Menu.Item key="2" onClick={() => setPage("models")}>
              <Text>
                Models
                </Text>
              </Menu.Item>
            ) : null
          }
          {userRole == "Admin" ? (
            <Menu.Item key="6" onClick={() => setPage("teams")}>
              <Text>
@ -75,11 +87,18 @@ const Sidebar: React.FC<SidebarProps> = ({
              </Text>
            </Menu.Item>
          ) : null}
-          <Menu.Item key="4" onClick={() => setPage("usage")}>
+
-          <Text>
+          {
-            Usage
+            userRole == "Admin" ? (
-            </Text>
+              <Menu.Item key="4" onClick={() => setPage("usage")}>
-          </Menu.Item>
+            <Text>
              Usage
              </Text>
            </Menu.Item>
              ) : null
          }
            {userRole == "Admin" ? (
            <Menu.Item key="5" onClick={() => setPage("users")}>
              <Text>
@ -87,16 +106,27 @@ const Sidebar: React.FC<SidebarProps> = ({
              </Text>
            </Menu.Item>
          ) : null}
-          <Menu.Item key="8" onClick={() => setPage("settings")}>
+
-          <Text>
+          {
-            Integrations
+            userRole == "Admin" ? (
-          </Text>
+              <Menu.Item key="8" onClick={() => setPage("settings")}>
-          </Menu.Item>
+                <Text>
-          <Menu.Item key="9" onClick={() => setPage("general-settings")}>
+                  Integrations
-          <Text>
+                </Text>
-            Settings
+                </Menu.Item>
-          </Text>
+              ) : null
-          </Menu.Item>
+          }
          {
            userRole == "Admin" ? (
              <Menu.Item key="9" onClick={() => setPage("general-settings")}>
              <Text>
                Settings
              </Text>
              </Menu.Item>
            ) : null
          }
          {userRole == "Admin" ? (
            <Menu.Item key="7" onClick={() => setPage("admin-panel")}>
              <Text>
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -296,6 +296,9 @@ export const userInfoCall = async (
    if (userRole == "App Owner" && userID) {
      url = `${url}?user_id=${userID}`;
    }
    if (userRole == "App User" && userID) {
      url = `${url}?user_id=${userID}`;
    }
    console.log("in userInfoCall viewAll=", viewAll);
    if (viewAll && page_size && (page != null) && (page != undefined)) {
      url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
--- a/ui/litellm-dashboard/src/components/user_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/user_dashboard.tsx
@ -5,6 +5,7 @@ import { Grid, Col, Card, Text, Title } from "@tremor/react";
 import CreateKey from "./create_key_button";
 import ViewKeyTable from "./view_key_table";
 import ViewUserSpend from "./view_user_spend";
 import ViewUserTeam from "./view_user_team";
 import DashboardTeam from "./dashboard_default_team";
 import { useSearchParams, useRouter } from "next/navigation";
 import { jwtDecode } from "jwt-decode";
@ -232,11 +233,19 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
      <div className="w-full mx-4">
      <Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
        <Col numColSpan={1}>
          <ViewUserTeam
            userID={userID}
            userRole={userRole}
            selectedTeam={selectedTeam ? selectedTeam : null}
            accessToken={accessToken}
          />
          <ViewUserSpend
            userID={userID}
            userRole={userRole}
            accessToken={accessToken}
            userSpend={teamSpend}
            selectedTeam = {selectedTeam ? selectedTeam : null}
          />
          <ViewKeyTable
@ -257,7 +266,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
            data={keys}
            setData={setKeys}
          />
-          <DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} />
+          <DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
        </Col>
      </Grid>
    </div>
--- a/ui/litellm-dashboard/src/components/view_user_spend.tsx
+++ b/ui/litellm-dashboard/src/components/view_user_spend.tsx
@ -2,7 +2,7 @@
 import React, { useEffect, useState } from "react";
 import { keyDeleteCall, getTotalSpendCall } from "./networking";
 import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
-import { DonutChart } from "@tremor/react";
+import { Accordion, AccordionHeader, AccordionList, DonutChart } from "@tremor/react";
 import {
  Badge,
  Card,
@ -16,9 +16,13 @@ import {
  Text,
  Title,
  Icon,
  AccordionBody,
  List,
  ListItem,
 } from "@tremor/react";
 import { Statistic } from "antd"
-import { spendUsersCall }  from "./networking";
+import { spendUsersCall, modelAvailableCall }  from "./networking";
 // Define the props type
@ -32,11 +36,13 @@ interface ViewUserSpendProps {
    userRole: string | null;
    accessToken: string | null;
    userSpend: number | null;  
    selectedTeam: any | null;
 }
-const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend }) => {
+const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend, selectedTeam }) => {
    console.log(`userSpend: ${userSpend}`)
    let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
    const [maxBudget, setMaxBudget] = useState(0.0);
    const [userModels, setUserModels] = useState([]);
    useEffect(() => {
      const fetchData = async () => {
        if (!accessToken || !userID || !userRole) {
@ -62,9 +68,30 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
          }
        }
      };
      const fetchUserModels = async () => {
        try {
          if (userID === null || userRole === null) {
            return;
          }
          if (accessToken !== null) {
            const model_available = await modelAvailableCall(accessToken, userID, userRole);
            let available_model_names = model_available["data"].map(
              (element: { id: string }) => element.id
            );
            console.log("available_model_names:", available_model_names);
            setUserModels(available_model_names);
          }
        } catch (error) {
          console.error("Error fetching user models:", error);
        }
      };
      fetchUserModels();
      fetchData();
-    }, [userRole, accessToken]);
+    }, [userRole, accessToken, userID]);
    useEffect(() => {
      if (userSpend !== null) {
@ -72,18 +99,50 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
      }
    }, [userSpend])
    // logic to decide what models to display
    let modelsToDisplay = [];
    if (selectedTeam && selectedTeam.models) {
        modelsToDisplay = selectedTeam.models;
    }
    // check if "all-proxy-models" is in modelsToDisplay
    if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
        console.log("user models:", userModels);
        modelsToDisplay = userModels;
    }
    const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";
    const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;
    console.log(`spend in view user spend: ${spend}`)
    return (
-        <>
+      <div className="flex items-center">
-      <p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">Total Spend </p>
+        <div>
-      <p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">${roundedSpend}</p>
+          <p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">
-        
+            Total Spend{" "}
-    </>
+          </p>
-    )
+          <p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">
            ${roundedSpend}
          </p>
        </div>
        <div className="ml-auto">
          <Accordion>
            <AccordionHeader>Models</AccordionHeader>
            <AccordionBody className="absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs">
              <List>
                {modelsToDisplay.map((model: string) => (
                  <ListItem key={model}>
                    <Text>{model}</Text>
                  </ListItem>
                ))}
              </List>
            </AccordionBody>
          </Accordion>
        </div>
      </div>
    );
 }
 export default ViewUserSpend;
--- a/ui/litellm-dashboard/src/components/view_user_team.tsx
+++ b/ui/litellm-dashboard/src/components/view_user_team.tsx
@ -0,0 +1,78 @@
 "use client";
 import React, { useEffect, useState } from "react";
 import {
  Badge,
  Card,
  Table,
  Metric,
  TableBody,
  TableCell,
  TableHead,
  TableHeaderCell,
  TableRow,
  Text,
  Title,
  Icon,
  Accordion,
  AccordionBody,
  AccordionHeader,
  List,
  ListItem,
 } from "@tremor/react";
 import { Statistic } from "antd"
 import { modelAvailableCall }  from "./networking";
 interface ViewUserTeamProps {
    userID: string | null;
    userRole: string | null;
    selectedTeam: any | null; 
    accessToken: string | null;
 }
 const ViewUserTeam: React.FC<ViewUserTeamProps> = ({ userID, userRole, selectedTeam, accessToken}) => {
    const [userModels, setUserModels] = useState([]);
    useEffect(() => {
        const fetchUserModels = async () => {
          try {
            if (userID === null || userRole === null) {
              return;
            }
            if (accessToken !== null) {
              const model_available = await modelAvailableCall(accessToken, userID, userRole);
              let available_model_names = model_available["data"].map(
                (element: { id: string }) => element.id
              );
              console.log("available_model_names:", available_model_names);
              setUserModels(available_model_names);
            }
          } catch (error) {
            console.error("Error fetching user models:", error);
          }
        };
        fetchUserModels();
      }, [accessToken, userID, userRole]);
    // logic to decide what models to display
    let modelsToDisplay = [];
    if (selectedTeam && selectedTeam.models) {
        modelsToDisplay = selectedTeam.models;
    }
    // check if "all-proxy-models" is in modelsToDisplay
    if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
        console.log("user models:", userModels);
        modelsToDisplay = userModels;
    }
    return (
        <>
        <div className="mb-5">
        <p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">{selectedTeam?.team_alias}</p>
        </div>
    </>
    )
 }
 export default ViewUserTeam;
		`@ -1 +1 @@`
			<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>				<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>