Merge branch 'BerriAI:main' into feature/watsonx-integration

2024-04-21 10:35:51 +02:00 · 2024-04-21 10:35:51 +02:00 · a77537ddd4
commit a77537ddd4
parent c36cb7d938 cb3c98abe4
45 changed files with 1027 additions and 281 deletions
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -279,7 +279,7 @@ router_settings:
 ```

 </TabItem>
-<TabItem value="simple-shuffle" label="(Default) Weighted Pick">
+<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">

 **Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**

--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -19,7 +19,7 @@ class PrometheusLogger:
        **kwargs,
    ):
        try:
-            verbose_logger.debug(f"in init prometheus metrics")
+            print(f"in init prometheus metrics")
            from prometheus_client import Counter

            self.litellm_llm_api_failed_requests_metric = Counter(
--- a/litellm/integrations/prometheus_services.py
+++ b/litellm/integrations/prometheus_services.py
@ -44,9 +44,18 @@ class PrometheusServicesLogger:
            )  # store the prometheus histogram/counter we need to call for each field in payload

            for service in self.services:
-                histogram = self.create_histogram(service)
-                counter = self.create_counter(service)
-                self.payload_to_prometheus_map[service] = [histogram, counter]
+                histogram = self.create_histogram(service, type_of_request="latency")
+                counter_failed_request = self.create_counter(
+                    service, type_of_request="failed_requests"
+                )
+                counter_total_requests = self.create_counter(
+                    service, type_of_request="total_requests"
+                )
+                self.payload_to_prometheus_map[service] = [
+                    histogram,
+                    counter_failed_request,
+                    counter_total_requests,
+                ]

            self.prometheus_to_amount_map: dict = (
                {}
@ -74,26 +83,26 @@ class PrometheusServicesLogger:
                    return metric
        return None

-    def create_histogram(self, label: str):
-        metric_name = "litellm_{}_latency".format(label)
+    def create_histogram(self, service: str, type_of_request: str):
+        metric_name = "litellm_{}_{}".format(service, type_of_request)
        is_registered = self.is_metric_registered(metric_name)
        if is_registered:
            return self.get_metric(metric_name)
        return self.Histogram(
            metric_name,
-            "Latency for {} service".format(label),
-            labelnames=[label],
+            "Latency for {} service".format(service),
+            labelnames=[service],
        )

-    def create_counter(self, label: str):
-        metric_name = "litellm_{}_failed_requests".format(label)
+    def create_counter(self, service: str, type_of_request: str):
+        metric_name = "litellm_{}_{}".format(service, type_of_request)
        is_registered = self.is_metric_registered(metric_name)
        if is_registered:
            return self.get_metric(metric_name)
        return self.Counter(
            metric_name,
-            "Total failed requests for {} service".format(label),
-            labelnames=[label],
+            "Total {} for {} service".format(type_of_request, service),
+            labelnames=[service],
        )

    def observe_histogram(
@ -120,6 +129,8 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_success_calls += 1

+        print(f"payload call type: {payload.call_type}")
+
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -129,11 +140,19 @@ class PrometheusServicesLogger:
                        labels=payload.service.value,
                        amount=payload.duration,
                    )
+                elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
+                    self.increment_counter(
+                        counter=obj,
+                        labels=payload.service.value,
+                        amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                    )

    def service_failure_hook(self, payload: ServiceLoggerPayload):
        if self.mock_testing:
            self.mock_testing_failure_calls += 1

+        print(f"payload call type: {payload.call_type}")
+
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -141,7 +160,7 @@ class PrometheusServicesLogger:
                    self.increment_counter(
                        counter=obj,
                        labels=payload.service.value,
-                        amount=1,  # LOG ERROR COUNT TO PROMETHEUS
+                        amount=1,  # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
                    )

    async def async_service_success_hook(self, payload: ServiceLoggerPayload):
@ -151,6 +170,8 @@ class PrometheusServicesLogger:
        if self.mock_testing:
            self.mock_testing_success_calls += 1

+        print(f"payload call type: {payload.call_type}")
+
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
@ -160,12 +181,20 @@ class PrometheusServicesLogger:
                        labels=payload.service.value,
                        amount=payload.duration,
                    )
+                elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
+                    self.increment_counter(
+                        counter=obj,
+                        labels=payload.service.value,
+                        amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                    )

    async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
        print(f"received error payload: {payload.error}")
        if self.mock_testing:
            self.mock_testing_failure_calls += 1

+        print(f"payload call type: {payload.call_type}")
+
        if payload.service.value in self.payload_to_prometheus_map:
            prom_objects = self.payload_to_prometheus_map[payload.service.value]
            for obj in prom_objects:
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -507,10 +507,11 @@ def construct_tool_use_system_prompt(
 ):  # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
    tool_str_list = []
    for tool in tools:
+        tool_function = get_attribute_or_key(tool, "function")
        tool_str = construct_format_tool_for_claude_prompt(
-            tool["function"]["name"],
-            tool["function"].get("description", ""),
-            tool["function"].get("parameters", {}),
+            get_attribute_or_key(tool_function, "name"),
+            get_attribute_or_key(tool_function, "description", ""),
+            get_attribute_or_key(tool_function, "parameters", {}),
        )
        tool_str_list.append(tool_str)
    tool_use_system_prompt = (
@ -634,7 +635,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
    </function_results>
    """
    name = message.get("name")
-    content = message.get("content")
+    content = message.get("content", "")
+    content = content.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")

    # We can't determine from openai message format whether it's a successful or
    # error call result so default to the successful result template
@ -655,13 +657,15 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
 def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
    invokes = ""
    for tool in tool_calls:
-        if tool["type"] != "function":
+        if get_attribute_or_key(tool, "type") != "function":
            continue

-        tool_name = tool["function"]["name"]
+        tool_function =  get_attribute_or_key(tool,"function")
+        tool_name = get_attribute_or_key(tool_function, "name")
+        tool_arguments = get_attribute_or_key(tool_function, "arguments")
        parameters = "".join(
            f"<{param}>{val}</{param}>\n"
-            for param, val in json.loads(tool["function"]["arguments"]).items()
+            for param, val in json.loads(tool_arguments).items()
        )
        invokes += (
            "<invoke>\n"
@ -715,7 +719,7 @@ def anthropic_messages_pt_xml(messages: list):
                    {
                        "type": "text",
                        "text": (
-                            convert_to_anthropic_tool_result(messages[msg_i])
+                            convert_to_anthropic_tool_result_xml(messages[msg_i])
                            if messages[msg_i]["role"] == "tool"
                            else messages[msg_i]["content"]
                        ),
@ -736,7 +740,7 @@ def anthropic_messages_pt_xml(messages: list):
            if messages[msg_i].get(
                "tool_calls", []
            ):  # support assistant tool invoke convertion
-                assistant_text += convert_to_anthropic_tool_invoke(  # type: ignore
+                assistant_text += convert_to_anthropic_tool_invoke_xml(  # type: ignore
                    messages[msg_i]["tool_calls"]
                )

@ -848,12 +852,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
    anthropic_tool_invoke = [
        {
            "type": "tool_use",
-            "id": tool["id"],
-            "name": tool["function"]["name"],
-            "input": json.loads(tool["function"]["arguments"]),
+            "id": get_attribute_or_key(tool, "id"),
+            "name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"),
+            "input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")),
        }
        for tool in tool_calls
-        if tool["type"] == "function"
+        if get_attribute_or_key(tool, "type") == "function"
    ]

    return anthropic_tool_invoke
@ -1074,7 +1078,8 @@ def cohere_message_pt(messages: list):
            tool_result = convert_openai_message_to_cohere_tool_result(message)
            tool_results.append(tool_result)
        else:
-            prompt += message["content"]
+            prompt += message["content"] + "\n\n"
+    prompt = prompt.rstrip()
    return prompt, tool_results


@ -1414,3 +1419,8 @@ def prompt_factory(
        return default_pt(
            messages=messages
        )  # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
+
+def get_attribute_or_key(tool_or_function, attribute, default=None):
+    if hasattr(tool_or_function, attribute):
+        return getattr(tool_or_function, attribute)
+    return tool_or_function.get(attribute, default)
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -236,17 +236,19 @@ def completion(
        if client is None:
            if vertex_credentials is not None and isinstance(vertex_credentials, str):
                import google.oauth2.service_account
-
-                json_obj = json.loads(vertex_credentials)
-
                creds = (
                    google.oauth2.service_account.Credentials.from_service_account_info(
-                        json_obj,
+                        json.loads(vertex_credentials),
                        scopes=["https://www.googleapis.com/auth/cloud-platform"],
                    )
                )
                ### CHECK IF ACCESS
                access_token = refresh_auth(credentials=creds)
+            else:
+                import google.auth
+                creds, _ = google.auth.default()
+                ### CHECK IF ACCESS
+                access_token = refresh_auth(credentials=creds)

            vertex_ai_client = AnthropicVertex(
                project_id=vertex_project,
--- a/litellm/main.py
+++ b/litellm/main.py
@ -610,6 +610,7 @@ def completion(
        "client",
        "rpm",
        "tpm",
+        "max_parallel_requests",
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_second",
@ -2598,6 +2599,7 @@ def embedding(
    client = kwargs.pop("client", None)
    rpm = kwargs.pop("rpm", None)
    tpm = kwargs.pop("tpm", None)
+    max_parallel_requests = kwargs.pop("max_parallel_requests", None)
    model_info = kwargs.get("model_info", None)
    metadata = kwargs.get("metadata", None)
    encoding_format = kwargs.get("encoding_format", None)
@ -2655,6 +2657,7 @@ def embedding(
        "client",
        "rpm",
        "tpm",
+        "max_parallel_requests",
        "input_cost_per_token",
        "output_cost_per_token",
        "input_cost_per_second",
@ -3514,6 +3517,7 @@ def image_generation(
            "client",
            "rpm",
            "tpm",
+            "max_parallel_requests",
            "input_cost_per_token",
            "output_cost_per_token",
            "hf_model_name",
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
+3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -4,14 +4,12 @@ model_list:
    model: openai/my-fake-model
    api_key: my-fake-key
    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    # api_base: http://0.0.0.0:8080
    stream_timeout: 0.001
 - model_name: fake-openai-endpoint
  litellm_params:
    model: openai/my-fake-model-2
    api_key: my-fake-key
    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    # api_base: http://0.0.0.0:8080
    stream_timeout: 0.001
 - litellm_params:
      model: azure/chatgpt-v-2
@ -30,13 +28,6 @@ model_list:
    # api_key: my-fake-key
    # api_base: https://exampleopenaiendpoint-production.up.railway.app/

-# litellm_settings:
-#   success_callback: ["prometheus"]
-#   failure_callback: ["prometheus"]
-#   service_callback: ["prometheus_system"]
-#   upperbound_key_generate_params: 
-#     max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
-
 router_settings:
  routing_strategy: usage-based-routing-v2 
  # redis_url: "os.environ/REDIS_URL"
@ -48,6 +39,10 @@ router_settings:
 litellm_settings:
  num_retries: 3 # retry call 3 times on each model_name
  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
+  success_callback: ["prometheus"]
+  failure_callback: ["prometheus"]
+  service_callback: ["prometheus_system"]
+

 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum):
        "/v2/key/info",
    ]

+    sso_only_routes: List = [
+        "/key/generate",
+        "/key/update",
+        "/key/delete",
+        "/global/spend/logs",
+        "/global/predict/spend/logs",
+    ]
+
    management_routes: List = [  # key
        "/key/generate",
        "/key/update",
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1053,6 +1053,11 @@ async def user_api_key_auth(
                                status_code=status.HTTP_403_FORBIDDEN,
                                detail="key not allowed to access this team's info",
                            )
+                elif (
+                    _has_user_setup_sso()
+                    and route in LiteLLMRoutes.sso_only_routes.value
+                ):
+                    pass
                else:
                    raise Exception(
                        f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
@ -1102,6 +1107,13 @@ async def user_api_key_auth(
                    return UserAPIKeyAuth(
                        api_key=api_key, user_role="proxy_admin", **valid_token_dict
                    )
+                elif (
+                    _has_user_setup_sso()
+                    and route in LiteLLMRoutes.sso_only_routes.value
+                ):
+                    return UserAPIKeyAuth(
+                        api_key=api_key, user_role="app_owner", **valid_token_dict
+                    )
                else:
                    raise Exception(
                        f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
@ -5721,6 +5733,20 @@ async def new_user(data: NewUserRequest):
            "user"  # only create a user, don't create key if 'auto_create_key' set to False
        )
    response = await generate_key_helper_fn(**data_json)
+
+    # Admin UI Logic
+    # if team_id passed add this user to the team
+    if data_json.get("team_id", None) is not None:
+        await team_member_add(
+            data=TeamMemberAddRequest(
+                team_id=data_json.get("team_id", None),
+                member=Member(
+                    user_id=data_json.get("user_id", None),
+                    role="user",
+                    user_email=data_json.get("user_email", None),
+                ),
+            )
+        )
    return NewUserResponse(
        key=response.get("token", ""),
        expires=response.get("expires", None),
@ -6526,13 +6552,20 @@ async def team_member_add(
    existing_team_row = await prisma_client.get_data(  # type: ignore
        team_id=data.team_id, table_name="team", query_type="find_unique"
    )
+    if existing_team_row is None:
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
+            },
+        )

    new_member = data.member

    existing_team_row.members_with_roles.append(new_member)

    complete_team_data = LiteLLM_TeamTable(
-        **existing_team_row.model_dump(),
+        **_get_pydantic_json_dict(existing_team_row),
    )

    team_row = await prisma_client.update_data(
@ -8120,7 +8153,6 @@ async def auth_callback(request: Request):
                }
                user_role = getattr(user_info, "user_role", None)

-            else:
            ## check if user-email in db ##
            user_info = await prisma_client.db.litellm_usertable.find_first(
                where={"user_email": user_email}
@ -8142,9 +8174,7 @@ async def auth_callback(request: Request):
                litellm.default_user_params, dict
            ):
                user_defined_values = {
-                        "models": litellm.default_user_params.get(
-                            "models", user_id_models
-                        ),
+                    "models": litellm.default_user_params.get("models", user_id_models),
                    "user_id": litellm.default_user_params.get("user_id", user_id),
                    "user_email": litellm.default_user_params.get(
                        "user_email", user_email
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -238,7 +238,10 @@ class ProxyLogging:
            litellm_params = kwargs.get("litellm_params", {})
            model = kwargs.get("model", "")
            api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
-            messages = kwargs.get("messages", "")
+            messages = kwargs.get("messages", None)
+            # if messages does not exist fallback to "input"
+            if messages is None:
+                messages = kwargs.get("input", None)

            # only use first 100 chars for alerting
            _messages = str(messages)[:100]
@ -282,7 +285,10 @@ class ProxyLogging:
    ):
        if request_data is not None:
            model = request_data.get("model", "")
-            messages = request_data.get("messages", "")
+            messages = request_data.get("messages", None)
+            if messages is None:
+                # if messages does not exist fallback to "input"
+                messages = request_data.get("input", None)
            trace_id = request_data.get("metadata", {}).get(
                "trace_id", None
            )  # get langfuse trace id
--- a/litellm/router.py
+++ b/litellm/router.py
@ -26,7 +26,12 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
    CustomHTTPTransport,
    AsyncCustomHTTPTransport,
 )
-from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
+from litellm.utils import (
+    ModelResponse,
+    CustomStreamWrapper,
+    get_utc_datetime,
+    calculate_max_parallel_requests,
+)
 import copy
 from litellm._logging import verbose_router_logger
 import logging
@ -61,6 +66,7 @@ class Router:
        num_retries: int = 0,
        timeout: Optional[float] = None,
        default_litellm_params={},  # default params for Router.chat.completion.create
+        default_max_parallel_requests: Optional[int] = None,
        set_verbose: bool = False,
        debug_level: Literal["DEBUG", "INFO"] = "INFO",
        fallbacks: List = [],
@ -198,6 +204,7 @@ class Router:
        )  # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.

        self.default_deployment = None  # use this to track the users default deployment, when they want to use model = *
+        self.default_max_parallel_requests = default_max_parallel_requests

        if model_list:
            model_list = copy.deepcopy(model_list)
@ -213,6 +220,7 @@ class Router:
        )  # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
        self.num_retries = num_retries or litellm.num_retries or 0
        self.timeout = timeout or litellm.request_timeout
+
        self.retry_after = retry_after
        self.routing_strategy = routing_strategy
        self.fallbacks = fallbacks or litellm.fallbacks
@ -298,7 +306,7 @@ class Router:
        else:
            litellm.failure_callback = [self.deployment_callback_on_failure]
        verbose_router_logger.info(
-            f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
+            f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
        )
        self.routing_strategy_args = routing_strategy_args

@ -496,7 +504,9 @@ class Router:
            )

            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
+                kwargs=kwargs,
+                client_type="max_parallel_requests",
            )

            if rpm_semaphore is not None and isinstance(
@ -681,7 +691,9 @@ class Router:

            ### CONCURRENCY-SAFE RPM CHECKS ###
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
+                kwargs=kwargs,
+                client_type="max_parallel_requests",
            )

            if rpm_semaphore is not None and isinstance(
@ -803,7 +815,9 @@ class Router:

            ### CONCURRENCY-SAFE RPM CHECKS ###
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
+                kwargs=kwargs,
+                client_type="max_parallel_requests",
            )

            if rpm_semaphore is not None and isinstance(
@ -1049,7 +1063,9 @@ class Router:
            )

            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
+                kwargs=kwargs,
+                client_type="max_parallel_requests",
            )

            if rpm_semaphore is not None and isinstance(
@ -1243,7 +1259,9 @@ class Router:

            ### CONCURRENCY-SAFE RPM CHECKS ###
            rpm_semaphore = self._get_client(
-                deployment=deployment, kwargs=kwargs, client_type="rpm_client"
+                deployment=deployment,
+                kwargs=kwargs,
+                client_type="max_parallel_requests",
            )

            if rpm_semaphore is not None and isinstance(
@ -1862,17 +1880,23 @@ class Router:
        model_id = model["model_info"]["id"]
        # ### IF RPM SET - initialize a semaphore ###
        rpm = litellm_params.get("rpm", None)
-        if rpm:
-            semaphore = asyncio.Semaphore(rpm)
-            cache_key = f"{model_id}_rpm_client"
+        tpm = litellm_params.get("tpm", None)
+        max_parallel_requests = litellm_params.get("max_parallel_requests", None)
+        calculated_max_parallel_requests = calculate_max_parallel_requests(
+            rpm=rpm,
+            max_parallel_requests=max_parallel_requests,
+            tpm=tpm,
+            default_max_parallel_requests=self.default_max_parallel_requests,
+        )
+        if calculated_max_parallel_requests:
+            semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
+            cache_key = f"{model_id}_max_parallel_requests_client"
            self.cache.set_cache(
                key=cache_key,
                value=semaphore,
                local_only=True,
            )

-        #     print("STORES SEMAPHORE IN CACHE")
-
        ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
        custom_llm_provider = litellm_params.get("custom_llm_provider")
        custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
@ -2537,8 +2561,8 @@ class Router:
            The appropriate client based on the given client_type and kwargs.
        """
        model_id = deployment["model_info"]["id"]
-        if client_type == "rpm_client":
-            cache_key = "{}_rpm_client".format(model_id)
+        if client_type == "max_parallel_requests":
+            cache_key = "{}_max_parallel_requests_client".format(model_id)
            client = self.cache.get_cache(key=cache_key, local_only=True)
            return client
        elif client_type == "async":
@ -2778,6 +2802,7 @@ class Router:
        """
        if (
            self.routing_strategy != "usage-based-routing-v2"
+            and self.routing_strategy != "simple-shuffle"
        ):  # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
            return self.get_available_deployment(
                model=model,
@ -2828,6 +2853,25 @@ class Router:
                messages=messages,
                input=input,
            )
+        elif self.routing_strategy == "simple-shuffle":
+            # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
+            ############## Check if we can do a RPM/TPM based weighted pick #################
+            rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
+            if rpm is not None:
+                # use weight-random pick if rpms provided
+                rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
+                verbose_router_logger.debug(f"\nrpms {rpms}")
+                total_rpm = sum(rpms)
+                weights = [rpm / total_rpm for rpm in rpms]
+                verbose_router_logger.debug(f"\n weights {weights}")
+                # Perform weighted random pick
+                selected_index = random.choices(range(len(rpms)), weights=weights)[0]
+                verbose_router_logger.debug(f"\n selected index, {selected_index}")
+                deployment = healthy_deployments[selected_index]
+                verbose_router_logger.info(
+                    f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
+                )
+                return deployment or deployment[0]

        if deployment is None:
            verbose_router_logger.info(
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -407,13 +407,15 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                tpm_keys.append(tpm_key)
                rpm_keys.append(rpm_key)

-        tpm_values = await self.router_cache.async_batch_get_cache(
-            keys=tpm_keys
-        )  # [1, 2, None, ..]
-        rpm_values = await self.router_cache.async_batch_get_cache(
-            keys=rpm_keys
+        combined_tpm_rpm_keys = tpm_keys + rpm_keys
+
+        combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache(
+            keys=combined_tpm_rpm_keys
        )  # [1, 2, None, ..]

+        tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
+        rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
+
        return self._common_checks_available_deployment(
            model_group=model_group,
            healthy_deployments=healthy_deployments,
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling():
        assert isinstance(
            response.choices[0].message.tool_calls[0].function.arguments, str
        )
+        messages.append(
+            response.choices[0].message.model_dump()
+        )  # Add assistant tool invokes
+        tool_result = (
+            '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
+        )
+        # Add user submitted tool results in the OpenAI format
+        messages.append(
+            {
+                "tool_call_id": response.choices[0].message.tool_calls[0].id,
+                "role": "tool",
+                "name": response.choices[0].message.tool_calls[0].function.name,
+                "content": tool_result,
+            }
+        )
+        # In the second response, Claude should deduce answer from tool results
+        second_response = completion(
+            model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+            messages=messages,
+            tools=tools,
+            tool_choice="auto",
+        )
+        print(f"second response: {second_response}")
+        assert isinstance(second_response.choices[0].message.content, str)
    except RateLimitError:
        pass
    except Exception as e:
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client):
        await litellm.proxy.proxy_server.prisma_client.connect()
        from litellm.proxy.proxy_server import user_api_key_cache

+        await new_team(
+            NewTeamRequest(
+                team_id="ishaan-special-team",
+            ),
+            user_api_key_dict=UserAPIKeyAuth(
+                user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            ),
+        )
+
        _response = await new_user(
            data=NewUserRequest(
                models=["azure-gpt-3.5"],
@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client):

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
+
+            # create team "litellm-core-infra@gmail.com""
+            print("creating team litellm-core-infra@gmail.com")
+            await new_team(
+                NewTeamRequest(
+                    team_id="litellm-core-infra@gmail.com",
+                ),
+                user_api_key_dict=UserAPIKeyAuth(
+                    user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+                ),
+            )
+
+            await new_team(
+                NewTeamRequest(
+                    team_id="ishaan-special-team",
+                ),
+                user_api_key_dict=UserAPIKeyAuth(
+                    user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+                ),
+            )
+
            request = NewUserRequest(
-                metadata={"team": "litellm-team3", "project": "litellm-project3"},
+                metadata={"project": "litellm-project3"},
                team_id="litellm-core-infra@gmail.com",
            )
+
            key = await new_user(request)
            print(key)

@ -1015,7 +1046,6 @@ def test_generate_and_update_key(prisma_client):
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
-                "team": "litellm-team3",
                "project": "litellm-project3",
            }
            assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
@ -1037,7 +1067,7 @@ def test_generate_and_update_key(prisma_client):
            # update the team id
            response2 = await update_key_fn(
                request=Request,
-                data=UpdateKeyRequest(key=generated_key, team_id="ishaan"),
+                data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"),
            )
            print("response2=", response2)

@ -1048,11 +1078,10 @@ def test_generate_and_update_key(prisma_client):
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
-                "team": "litellm-team3",
                "project": "litellm-project3",
            }
            assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
-            assert result["info"]["team_id"] == "ishaan"
+            assert result["info"]["team_id"] == "ishaan-special-team"

            # cleanup - delete key
            delete_key_request = KeyRequest(keys=[generated_key])
@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client):
        await litellm.proxy.proxy_server.prisma_client.connect()
        from litellm.proxy.proxy_server import user_api_key_cache

+        await new_team(
+            NewTeamRequest(
+                team_id="ishaans-special-team",
+            ),
+            user_api_key_dict=UserAPIKeyAuth(
+                user_role="proxy_admin", api_key="sk-1234", user_id="1234"
+            ),
+        )
+
        _response = await new_user(
            data=NewUserRequest(
                models=["azure-gpt-3.5"],
--- a/litellm/tests/test_router_debug_logs.py
+++ b/litellm/tests/test_router_debug_logs.py
@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
    # Define the expected log messages
    # - error request, falling back notice, success notice
    expected_logs = [
-        "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None",
+        "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None",
        "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
        "Falling back to model_group = azure/gpt-3.5-turbo",
        "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
--- a/litellm/tests/test_router_max_parallel_requests.py
+++ b/litellm/tests/test_router_max_parallel_requests.py
@ -0,0 +1,115 @@
+# What is this?
+## Unit tests for the max_parallel_requests feature on Router
+import sys, os, time, inspect, asyncio, traceback
+from datetime import datetime
+import pytest
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+from litellm.utils import calculate_max_parallel_requests
+from typing import Optional
+
+"""
+- only rpm
+- only tpm
+- only max_parallel_requests 
+- max_parallel_requests + rpm 
+- max_parallel_requests + tpm
+- max_parallel_requests + tpm + rpm 
+"""
+
+
+max_parallel_requests_values = [None, 10]
+tpm_values = [None, 20, 300000]
+rpm_values = [None, 30]
+default_max_parallel_requests = [None, 40]
+
+
+@pytest.mark.parametrize(
+    "max_parallel_requests, tpm, rpm, default_max_parallel_requests",
+    [
+        (mp, tp, rp, dmp)
+        for mp in max_parallel_requests_values
+        for tp in tpm_values
+        for rp in rpm_values
+        for dmp in default_max_parallel_requests
+    ],
+)
+def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests):
+    calculated_max_parallel_requests = calculate_max_parallel_requests(
+        max_parallel_requests=max_parallel_requests,
+        rpm=rpm,
+        tpm=tpm,
+        default_max_parallel_requests=default_max_parallel_requests,
+    )
+    if max_parallel_requests is not None:
+        assert max_parallel_requests == calculated_max_parallel_requests
+    elif rpm is not None:
+        assert rpm == calculated_max_parallel_requests
+    elif tpm is not None:
+        calculated_rpm = int(tpm / 1000 / 6)
+        if calculated_rpm == 0:
+            calculated_rpm = 1
+        print(
+            f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}"
+        )
+        assert calculated_rpm == calculated_max_parallel_requests
+    elif default_max_parallel_requests is not None:
+        assert calculated_max_parallel_requests == default_max_parallel_requests
+    else:
+        assert calculated_max_parallel_requests is None
+
+
+@pytest.mark.parametrize(
+    "max_parallel_requests, tpm, rpm, default_max_parallel_requests",
+    [
+        (mp, tp, rp, dmp)
+        for mp in max_parallel_requests_values
+        for tp in tpm_values
+        for rp in rpm_values
+        for dmp in default_max_parallel_requests
+    ],
+)
+def test_setting_mpr_limits_per_model(
+    max_parallel_requests, tpm, rpm, default_max_parallel_requests
+):
+    deployment = {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {
+            "model": "gpt-3.5-turbo",
+            "max_parallel_requests": max_parallel_requests,
+            "tpm": tpm,
+            "rpm": rpm,
+        },
+        "model_info": {"id": "my-unique-id"},
+    }
+
+    router = litellm.Router(
+        model_list=[deployment],
+        default_max_parallel_requests=default_max_parallel_requests,
+    )
+
+    mpr_client: Optional[asyncio.Semaphore] = router._get_client(
+        deployment=deployment,
+        kwargs={},
+        client_type="max_parallel_requests",
+    )
+
+    if max_parallel_requests is not None:
+        assert max_parallel_requests == mpr_client._value
+    elif rpm is not None:
+        assert rpm == mpr_client._value
+    elif tpm is not None:
+        calculated_rpm = int(tpm / 1000 / 6)
+        if calculated_rpm == 0:
+            calculated_rpm = 1
+        print(
+            f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}"
+        )
+        assert calculated_rpm == mpr_client._value
+    elif default_max_parallel_requests is not None:
+        assert mpr_client._value == default_max_parallel_requests
+    else:
+        assert mpr_client is None
+
+    # raise Exception("it worked!")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5434,6 +5434,49 @@ def get_optional_params(
    return optional_params


+def calculate_max_parallel_requests(
+    max_parallel_requests: Optional[int],
+    rpm: Optional[int],
+    tpm: Optional[int],
+    default_max_parallel_requests: Optional[int],
+) -> Optional[int]:
+    """
+    Returns the max parallel requests to send to a deployment.
+
+    Used in semaphore for async requests on router.
+
+    Parameters:
+    - max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
+    - rpm - Optional[int] - requests per minute allowed for that deployment
+    - tpm - Optional[int] - tokens per minute allowed for that deployment
+    - default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
+
+    Returns:
+    - int or None (if all params are None)
+
+    Order:
+    max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
+
+    Azure RPM formula:
+    6 rpm per 1000 TPM
+    https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
+
+
+    """
+    if max_parallel_requests is not None:
+        return max_parallel_requests
+    elif rpm is not None:
+        return rpm
+    elif tpm is not None:
+        calculated_rpm = int(tpm / 1000 / 6)
+        if calculated_rpm == 0:
+            calculated_rpm = 1
+        return calculated_rpm
+    elif default_max_parallel_requests is not None:
+        return default_max_parallel_requests
+    return None
+
+
 def get_api_base(model: str, optional_params: dict) -> Optional[str]:
    """
    Returns the api base used for calling the model.
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -96,9 +96,9 @@ litellm_settings:

 router_settings:
  routing_strategy: usage-based-routing-v2 
-  redis_host: os.environ/REDIS_HOST
-  redis_password: os.environ/REDIS_PASSWORD
-  redis_port: os.environ/REDIS_PORT
+  # redis_host: os.environ/REDIS_HOST
+  # redis_password: os.environ/REDIS_PASSWORD
+  # redis_port: os.environ/REDIS_PORT
  enable_pre_call_checks: true

 general_settings: 
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.35.17"
+version = "1.35.18"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.35.17"
+version = "1.35.18"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -14,6 +14,24 @@ sys.path.insert(
 import litellm


+async def generate_team(session):
+    url = "http://0.0.0.0:4000/team/new"
+    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    data = {
+        "team_id": "litellm-dashboard",
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(f"Response (Status code: {status}):")
+        print(response_text)
+        print()
+        _json_response = await response.json()
+        return _json_response
+
+
 async def generate_user(
    session,
    user_role="app_owner",
@ -668,7 +686,7 @@ async def test_key_rate_limit():


@pytest.mark.asyncio
-async def test_key_delete():
+async def test_key_delete_ui():
    """
    Admin UI flow - DO NOT DELETE
    -> Create a key with user_id = "ishaan"
@ -680,6 +698,8 @@ async def test_key_delete():
        key = key_gen["key"]

        # generate a admin UI key
+        team = await generate_team(session=session)
+        print("generated team: ", team)
        admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
        print(
            "trying to delete key=",
--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit():
            await asyncio.gather(*tasks)
            pytest.fail("Expected at least 1 call to fail")
        except Exception as e:
+            if "Request did not return a 200 status code: 429" in str(e):
                pass
+            else:
+                pytest.fail(f"Wrong error received - {str(e)}")


@pytest.mark.asyncio
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/Vjlnu8AomhCFg4fkGtcUs/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-6ba29bc4256320f4.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-93ac11fb17dce9d6.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
+3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -9,6 +9,7 @@ import Teams from "@/components/teams";
 import AdminPanel from "@/components/admins";
 import Settings from "@/components/settings";
 import GeneralSettings from "@/components/general_settings";
+import APIRef from "@/components/api_ref";
 import ChatUI from "@/components/chat_ui";
 import Sidebar from "../components/leftnav";
 import Usage from "../components/usage";
@ -165,6 +166,8 @@ const CreateKeyPage = () => {
              accessToken={accessToken}
              showSSOBanner={showSSOBanner}
            />
+          ) : page == "api_ref" ? (
+              <APIRef/>
          ) : page == "settings" ? (
            <Settings
              userID={userID}
--- a/ui/litellm-dashboard/src/components/api_ref.tsx
+++ b/ui/litellm-dashboard/src/components/api_ref.tsx
@ -0,0 +1,152 @@
+"use client";
+import React, { useEffect, useState } from "react";
+import {
+  Badge,
+  Card,
+  Table,
+  Metric,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeaderCell,
+  TableRow,
+  Text,
+  Title,
+  Icon,
+  Accordion,
+  AccordionBody,
+  AccordionHeader,
+  List,
+  ListItem,
+  Tab,
+  TabGroup,
+  TabList,
+  TabPanel,
+  TabPanels,
+  Grid,
+} from "@tremor/react";
+import { Statistic } from "antd"
+import { modelAvailableCall }  from "./networking";
+import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
+
+
+const APIRef = ({}) => {
+    return (
+        <>
+         <Grid className="gap-2 p-8 h-[80vh] w-full mt-2">
+        <div className="mb-5">
+            <p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">OpenAI Compatible Proxy: API Reference</p>        
+            <Text className="mt-2 mb-2">LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below </Text>
+
+                <TabGroup>
+                  <TabList>
+                    <Tab>OpenAI Python SDK</Tab>
+                    <Tab>LlamaIndex</Tab>
+                    <Tab>Langchain Py</Tab>
+                  </TabList>
+                  <TabPanels>
+                    <TabPanel>
+                      <SyntaxHighlighter language="python">
+                        {`
+import openai
+client = openai.OpenAI(
+    api_key="your_api_key",
+    base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys
+)
+
+response = client.chat.completions.create(
+    model="gpt-3.5-turbo", # model to send to the proxy
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test request, write a short poem"
+        }
+    ]
+)
+
+print(response)
+            `}
+                      </SyntaxHighlighter>
+                    </TabPanel>
+                    <TabPanel>
+                      <SyntaxHighlighter language="python">
+                        {`
+import os, dotenv
+
+from llama_index.llms import AzureOpenAI
+from llama_index.embeddings import AzureOpenAIEmbedding
+from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
+
+llm = AzureOpenAI(
+    engine="azure-gpt-3.5",               # model_name on litellm proxy
+    temperature=0.0,
+    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
+    api_key="sk-1234",                    # litellm proxy API Key
+    api_version="2023-07-01-preview",
+)
+
+embed_model = AzureOpenAIEmbedding(
+    deployment_name="azure-embedding-model",
+    azure_endpoint="http://0.0.0.0:4000",
+    api_key="sk-1234",
+    api_version="2023-07-01-preview",
+)
+
+
+documents = SimpleDirectoryReader("llama_index_data").load_data()
+service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
+index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+
+query_engine = index.as_query_engine()
+response = query_engine.query("What did the author do growing up?")
+print(response)
+
+            `}
+                      </SyntaxHighlighter>
+                    </TabPanel>
+                    <TabPanel>
+                      <SyntaxHighlighter language="python">
+                        {`
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000",
+    model = "gpt-3.5-turbo",
+    temperature=0.1
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+
+            `}
+                      </SyntaxHighlighter>
+                    </TabPanel>
+                  </TabPanels>
+                </TabGroup>
+
+        
+        </div>
+        </Grid>
+
+        
+    </>
+    )
+}
+
+export default APIRef;
+
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@ -13,12 +13,12 @@ import {
  TabGroup,
  TabList,
  TabPanel,
+  TabPanels,
  Metric,
  Col,
  Text,
  SelectItem,
  TextInput,
-  TabPanels,
  Button,
 } from "@tremor/react";

@ -201,7 +201,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
          <TabGroup>
            <TabList>
              <Tab>Chat</Tab>
-              <Tab>API Reference</Tab>
            </TabList>

            <TabPanels>
@ -272,124 +271,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
                  </div>
                </div>
              </TabPanel>
-              <TabPanel>
-                <TabGroup>
-                  <TabList>
-                    <Tab>OpenAI Python SDK</Tab>
-                    <Tab>LlamaIndex</Tab>
-                    <Tab>Langchain Py</Tab>
-                  </TabList>
-                  <TabPanels>
-                    <TabPanel>
-                      <SyntaxHighlighter language="python">
-                        {`
-import openai
-client = openai.OpenAI(
-    api_key="your_api_key",
-    base_url="http://0.0.0.0:4000" # proxy base url
-)
              
-response = client.chat.completions.create(
-    model="gpt-3.5-turbo", # model to use from Models Tab
-    messages = [
-        {
-            "role": "user",
-            "content": "this is a test request, write a short poem"
-        }
-    ],
-    extra_body={
-        "metadata": {
-            "generation_name": "ishaan-generation-openai-client",
-            "generation_id": "openai-client-gen-id22",
-            "trace_id": "openai-client-trace-id22",
-            "trace_user_id": "openai-client-user-id2"
-        }
-    }
-)
-
-print(response)
-            `}
-                      </SyntaxHighlighter>
-                    </TabPanel>
-                    <TabPanel>
-                      <SyntaxHighlighter language="python">
-                        {`
-import os, dotenv
-
-from llama_index.llms import AzureOpenAI
-from llama_index.embeddings import AzureOpenAIEmbedding
-from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
-
-llm = AzureOpenAI(
-    engine="azure-gpt-3.5",               # model_name on litellm proxy
-    temperature=0.0,
-    azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
-    api_key="sk-1234",                    # litellm proxy API Key
-    api_version="2023-07-01-preview",
-)
-
-embed_model = AzureOpenAIEmbedding(
-    deployment_name="azure-embedding-model",
-    azure_endpoint="http://0.0.0.0:4000",
-    api_key="sk-1234",
-    api_version="2023-07-01-preview",
-)
-
-
-documents = SimpleDirectoryReader("llama_index_data").load_data()
-service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
-index = VectorStoreIndex.from_documents(documents, service_context=service_context)
-
-query_engine = index.as_query_engine()
-response = query_engine.query("What did the author do growing up?")
-print(response)
-
-            `}
-                      </SyntaxHighlighter>
-                    </TabPanel>
-                    <TabPanel>
-                      <SyntaxHighlighter language="python">
-                        {`
-from langchain.chat_models import ChatOpenAI
-from langchain.prompts.chat import (
-    ChatPromptTemplate,
-    HumanMessagePromptTemplate,
-    SystemMessagePromptTemplate,
-)
-from langchain.schema import HumanMessage, SystemMessage
-
-chat = ChatOpenAI(
-    openai_api_base="http://0.0.0.0:8000",
-    model = "gpt-3.5-turbo",
-    temperature=0.1,
-    extra_body={
-        "metadata": {
-            "generation_name": "ishaan-generation-langchain-client",
-            "generation_id": "langchain-client-gen-id22",
-            "trace_id": "langchain-client-trace-id22",
-            "trace_user_id": "langchain-client-user-id2"
-        }
-    }
-)
-
-messages = [
-    SystemMessage(
-        content="You are a helpful assistant that im using to make a test request to."
-    ),
-    HumanMessage(
-        content="test from litellm. tell me why it's amazing in 1 sentence"
-    ),
-]
-response = chat(messages)
-
-print(response)
-
-            `}
-                      </SyntaxHighlighter>
-                    </TabPanel>
-                  </TabPanels>
-                </TabGroup>
-              </TabPanel>
            </TabPanels>
          </TabGroup>
        </Card>
--- a/ui/litellm-dashboard/src/components/create_key_button.tsx
+++ b/ui/litellm-dashboard/src/components/create_key_button.tsx
@ -2,7 +2,7 @@

 import React, { useState, useEffect, useRef } from "react";
 import { Button, TextInput, Grid, Col } from "@tremor/react";
-import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
+import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react";
 import { CopyToClipboard } from 'react-copy-to-clipboard';
 import {
  Button as Button2,
@ -147,6 +147,17 @@ const CreateKey: React.FC<CreateKeyProps> = ({
                  mode="multiple"
                  placeholder="Select models"
                  style={{ width: "100%" }}
+                  onChange={(values) => {
+                    // Check if "All Team Models" is selected
+                    const isAllTeamModelsSelected = values.includes("all-team-models");
+              
+                    // If "All Team Models" is selected, deselect all other models
+                    if (isAllTeamModelsSelected) {
+                      const newValues = ["all-team-models"];
+                      // You can call the form's setFieldsValue method to update the value
+                      form.setFieldsValue({ models: newValues });
+                    }
+                  }}
                >
                    <Option key="all-team-models" value="all-team-models">
                      All Team Models
@ -248,16 +259,153 @@ const CreateKey: React.FC<CreateKeyProps> = ({
            </>
          ) : (
            <>
-              <Form.Item label="Key Name" name="key_alias">
+              <Form.Item 
+                label="Key Name" 
+                name="key_alias"
+                rules={[{ required: true, message: 'Please input a key name' }]}
+                help="required"
+              >
                <Input />
              </Form.Item>
-              <Form.Item label="Team ID (Contact Group)" name="team_id">
-                <Input placeholder="default team (create a new team)" />
+              <Form.Item
+                label="Team ID"
+                name="team_id"
+                hidden={true}
+                initialValue={team ? team["team_id"] : null}
+                valuePropName="team_id"
+                className="mt-8"
+              >
+                <Input value={team ? team["team_alias"] : ""} disabled />
              </Form.Item>

-              <Form.Item label="Description" name="description">
-                <Input.TextArea placeholder="Enter description" rows={4} />
+              <Form.Item 
+                label="Models" 
+                name="models"
+                className="mb-12"
+                rules={[{ required: true, message: 'Please select a model' }]}
+                help="required"
+              >
+                <Select
+                  mode="multiple"
+                  placeholder="Select models"
+                  style={{ width: "100%" }}
+                  onChange={(values) => {
+                    const isAllTeamModelsSelected = values.includes("all-team-models");
+              
+                    if (isAllTeamModelsSelected) {
+                      const newValues = ["all-team-models"];
+                      form.setFieldsValue({ models: newValues });
+                    }
+                  }}
+
+                >
+                    <Option key="all-team-models" value="all-team-models">
+                      All Team Models
+                    </Option>
+                    {team && team.models ? (
+                      team.models.includes("all-proxy-models") ? (
+                        userModels.map((model: string) => (
+                          (
+                            <Option key={model} value={model}>
+                              {model}
+                            </Option>
+                          )
+                        ))
+                      ) : (
+                        team.models.map((model: string) => (
+                          <Option key={model} value={model}>
+                            {model}
+                          </Option>
+                        ))
+                      )
+                    ) : (
+                      userModels.map((model: string) => (
+                        <Option key={model} value={model}>
+                          {model}
+                        </Option>
+                      ))
+                    )}
+
+                </Select>
              </Form.Item>
+
+              <Accordion className="mt-20 mb-8" >
+                <AccordionHeader>
+                  <b>Optional Settings</b>
+                </AccordionHeader>
+                <AccordionBody>
+                <Form.Item 
+                className="mt-8"
+                label="Max Budget (USD)" 
+                name="max_budget" 
+                help={`Budget cannot exceed team max budget: $${team?.max_budget !== null && team?.max_budget !== undefined ? team?.max_budget : 'unlimited'}`}
+                rules={[
+                  {
+                      validator: async (_, value) => {
+                          if (value && team && team.max_budget !== null && value > team.max_budget) {
+                              throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`);
+                          }
+                      },
+                  },
+              ]}
+              >
+                <InputNumber step={0.01} precision={2} width={200} />
+              </Form.Item>
+              <Form.Item 
+              className="mt-8"
+              label="Reset Budget" 
+              name="budget_duration"
+              help={`Team Reset Budget: ${team?.budget_duration !== null && team?.budget_duration !== undefined ? team?.budget_duration : 'None'}`}
+              >
+                <Select defaultValue={null} placeholder="n/a">
+                  <Select.Option value="24h">daily</Select.Option>
+                  <Select.Option value="30d">monthly</Select.Option>
+                </Select>
+              </Form.Item>
+              <Form.Item 
+                className="mt-8"
+                label="Tokens per minute Limit (TPM)" 
+                name="tpm_limit" 
+                help={`TPM cannot exceed team TPM limit: ${team?.tpm_limit !== null && team?.tpm_limit !== undefined ? team?.tpm_limit : 'unlimited'}`}
+                rules={[
+                  {
+                      validator: async (_, value) => {
+                          if (value && team && team.tpm_limit !== null && value > team.tpm_limit) {
+                              throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`);
+                          }
+                      },
+                  },
+              ]}
+                >
+                <InputNumber step={1} width={400} />
+              </Form.Item>
+              <Form.Item
+                className="mt-8"
+                label="Requests per minute Limit (RPM)"
+                name="rpm_limit"
+                help={`RPM cannot exceed team RPM limit: ${team?.rpm_limit !== null && team?.rpm_limit !== undefined ? team?.rpm_limit : 'unlimited'}`}
+                rules={[
+                  {
+                      validator: async (_, value) => {
+                          if (value && team && team.rpm_limit !== null && value > team.rpm_limit) {
+                              throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`);
+                          }
+                      },
+                  },
+              ]}
+              >
+                <InputNumber step={1} width={400} />
+              </Form.Item>
+              <Form.Item label="Expire Key (eg: 30s, 30h, 30d)" name="duration" className="mt-8">
+                <Input />
+              </Form.Item>
+              <Form.Item label="Metadata" name="metadata">
+                <Input.TextArea rows={4} placeholder="Enter metadata as JSON" />
+              </Form.Item>
+
+                </AccordionBody>
+              </Accordion>
+
            </>
          )}
          <div style={{ textAlign: "right", marginTop: "10px" }}>
--- a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx
+++ b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx
@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react";
 interface DashboardTeamProps {
  teams: Object[] | null;
  setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
+  userRole: string | null;
 }

 type TeamInterface = {
@ -15,6 +16,7 @@ type TeamInterface = {
 const DashboardTeam: React.FC<DashboardTeamProps> = ({
  teams,
  setSelectedTeam,
+  userRole,
 }) => {
  const defaultTeam: TeamInterface = {
    models: [],
@ -25,19 +27,26 @@ const DashboardTeam: React.FC<DashboardTeamProps> = ({

  const [value, setValue] = useState(defaultTeam);

-  const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
-
+  let updatedTeams;
+  if (userRole === "App User") {
+    // Non-Admin SSO users should only see their own team - they should not see "Default Team"
+    updatedTeams = teams;
+  } else {
+    updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
+  }
+  if (userRole === 'App User') return null;

  return (
    <div className="mt-5 mb-5">
      <Title>Select Team</Title>
+      
      <Text>
-        If you belong to multiple teams, this setting controls which team is
-        used by default when creating new API Keys.
+        If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys.
      </Text>
      <Text className="mt-3 mb-3">
        <b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
      </Text>
+
      {updatedTeams && updatedTeams.length > 0 ? (
        <Select defaultValue="0">
          {updatedTeams.map((team: any, index) => (
--- a/ui/litellm-dashboard/src/components/leftnav.tsx
+++ b/ui/litellm-dashboard/src/components/leftnav.tsx
@ -46,8 +46,8 @@ const Sidebar: React.FC<SidebarProps> = ({
    );
  }
  return (
-    <Layout style={{ minHeight: "100vh", maxWidth: "120px" }}>
-      <Sider width={120}>
+    <Layout style={{ minHeight: "100vh", maxWidth: "130px" }}>
+      <Sider width={130}>
        <Menu
          mode="inline"
          defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
@ -63,11 +63,23 @@ const Sidebar: React.FC<SidebarProps> = ({
            Test Key
            </Text>
          </Menu.Item>
+
+              <Menu.Item key="11" onClick={() => setPage("api_ref")}>
+              <Text>
+                API Reference
+                </Text>
+              </Menu.Item>
+
+          {
+            userRole == "Admin" ? (
              <Menu.Item key="2" onClick={() => setPage("models")}>
              <Text>
                Models
                </Text>
              </Menu.Item>
+            ) : null
+          }
+
          {userRole == "Admin" ? (
            <Menu.Item key="6" onClick={() => setPage("teams")}>
              <Text>
@ -75,11 +87,18 @@ const Sidebar: React.FC<SidebarProps> = ({
              </Text>
            </Menu.Item>
          ) : null}
+
+          {
+            userRole == "Admin" ? (
              <Menu.Item key="4" onClick={() => setPage("usage")}>
            <Text>
              Usage
              </Text>
            </Menu.Item>
+
+              ) : null
+          }
+          
            {userRole == "Admin" ? (
            <Menu.Item key="5" onClick={() => setPage("users")}>
              <Text>
@ -87,16 +106,27 @@ const Sidebar: React.FC<SidebarProps> = ({
              </Text>
            </Menu.Item>
          ) : null}
+
+          {
+            userRole == "Admin" ? (
              <Menu.Item key="8" onClick={() => setPage("settings")}>
                <Text>
                  Integrations
                </Text>
                </Menu.Item>
+              ) : null
+          }
+          
+          {
+            userRole == "Admin" ? (
              <Menu.Item key="9" onClick={() => setPage("general-settings")}>
              <Text>
                Settings
              </Text>
              </Menu.Item>
+            ) : null
+          }
+
          {userRole == "Admin" ? (
            <Menu.Item key="7" onClick={() => setPage("admin-panel")}>
              <Text>
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -296,6 +296,9 @@ export const userInfoCall = async (
    if (userRole == "App Owner" && userID) {
      url = `${url}?user_id=${userID}`;
    }
+    if (userRole == "App User" && userID) {
+      url = `${url}?user_id=${userID}`;
+    }
    console.log("in userInfoCall viewAll=", viewAll);
    if (viewAll && page_size && (page != null) && (page != undefined)) {
      url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
--- a/ui/litellm-dashboard/src/components/user_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/user_dashboard.tsx
@ -5,6 +5,7 @@ import { Grid, Col, Card, Text, Title } from "@tremor/react";
 import CreateKey from "./create_key_button";
 import ViewKeyTable from "./view_key_table";
 import ViewUserSpend from "./view_user_spend";
+import ViewUserTeam from "./view_user_team";
 import DashboardTeam from "./dashboard_default_team";
 import { useSearchParams, useRouter } from "next/navigation";
 import { jwtDecode } from "jwt-decode";
@ -232,11 +233,19 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
      <div className="w-full mx-4">
      <Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
        <Col numColSpan={1}>
+          <ViewUserTeam
+            userID={userID}
+            userRole={userRole}
+            selectedTeam={selectedTeam ? selectedTeam : null}
+            accessToken={accessToken}
+          />
          <ViewUserSpend
            userID={userID}
            userRole={userRole}
            accessToken={accessToken}
            userSpend={teamSpend}
+            selectedTeam = {selectedTeam ? selectedTeam : null}
+
          />

          <ViewKeyTable
@ -257,7 +266,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
            data={keys}
            setData={setKeys}
          />
-          <DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} />
+          <DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
        </Col>
      </Grid>
    </div>
--- a/ui/litellm-dashboard/src/components/view_user_spend.tsx
+++ b/ui/litellm-dashboard/src/components/view_user_spend.tsx
@ -2,7 +2,7 @@
 import React, { useEffect, useState } from "react";
 import { keyDeleteCall, getTotalSpendCall } from "./networking";
 import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
-import { DonutChart } from "@tremor/react";
+import { Accordion, AccordionHeader, AccordionList, DonutChart } from "@tremor/react";
 import {
  Badge,
  Card,
@ -16,9 +16,13 @@ import {
  Text,
  Title,
  Icon,
+  AccordionBody,
+  List,
+  ListItem,
+
 } from "@tremor/react";
 import { Statistic } from "antd"
-import { spendUsersCall }  from "./networking";
+import { spendUsersCall, modelAvailableCall }  from "./networking";


 // Define the props type
@ -32,11 +36,13 @@ interface ViewUserSpendProps {
    userRole: string | null;
    accessToken: string | null;
    userSpend: number | null;  
+    selectedTeam: any | null;
 }
-const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend }) => {
+const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend, selectedTeam }) => {
    console.log(`userSpend: ${userSpend}`)
    let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
    const [maxBudget, setMaxBudget] = useState(0.0);
+    const [userModels, setUserModels] = useState([]);
    useEffect(() => {
      const fetchData = async () => {
        if (!accessToken || !userID || !userRole) {
@ -62,9 +68,30 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
          }
        }
      };
+      const fetchUserModels = async () => {
+        try {
+          if (userID === null || userRole === null) {
+            return;
+          }
  
+          if (accessToken !== null) {
+            const model_available = await modelAvailableCall(accessToken, userID, userRole);
+            let available_model_names = model_available["data"].map(
+              (element: { id: string }) => element.id
+            );
+            console.log("available_model_names:", available_model_names);
+            setUserModels(available_model_names);
+          }
+        } catch (error) {
+          console.error("Error fetching user models:", error);
+        }
+      };
+    
+      fetchUserModels();
      fetchData();
-    }, [userRole, accessToken]);
+    }, [userRole, accessToken, userID]);
+
+    

    useEffect(() => {
      if (userSpend !== null) {
@ -72,18 +99,50 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
      }
    }, [userSpend])

+    // logic to decide what models to display
+    let modelsToDisplay = [];
+    if (selectedTeam && selectedTeam.models) {
+        modelsToDisplay = selectedTeam.models;
+    }
+
+    // check if "all-proxy-models" is in modelsToDisplay
+    if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
+        console.log("user models:", userModels);
+        modelsToDisplay = userModels;
+    }
+
+
    const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";

    const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;

    console.log(`spend in view user spend: ${spend}`)
    return (
-        <>
-      <p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">Total Spend </p>
-      <p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">${roundedSpend}</p>
-        
-    </>
-    )
+      <div className="flex items-center">
+        <div>
+          <p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">
+            Total Spend{" "}
+          </p>
+          <p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">
+            ${roundedSpend}
+          </p>
+        </div>
+        <div className="ml-auto">
+          <Accordion>
+            <AccordionHeader>Models</AccordionHeader>
+            <AccordionBody className="absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs">
+              <List>
+                {modelsToDisplay.map((model: string) => (
+                  <ListItem key={model}>
+                    <Text>{model}</Text>
+                  </ListItem>
+                ))}
+              </List>
+            </AccordionBody>
+          </Accordion>
+        </div>
+      </div>
+    );
 }

 export default ViewUserSpend;
--- a/ui/litellm-dashboard/src/components/view_user_team.tsx
+++ b/ui/litellm-dashboard/src/components/view_user_team.tsx
@ -0,0 +1,78 @@
+"use client";
+import React, { useEffect, useState } from "react";
+import {
+  Badge,
+  Card,
+  Table,
+  Metric,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeaderCell,
+  TableRow,
+  Text,
+  Title,
+  Icon,
+  Accordion,
+  AccordionBody,
+  AccordionHeader,
+  List,
+  ListItem,
+} from "@tremor/react";
+import { Statistic } from "antd"
+import { modelAvailableCall }  from "./networking";
+
+
+interface ViewUserTeamProps {
+    userID: string | null;
+    userRole: string | null;
+    selectedTeam: any | null; 
+    accessToken: string | null;
+}
+const ViewUserTeam: React.FC<ViewUserTeamProps> = ({ userID, userRole, selectedTeam, accessToken}) => {
+    const [userModels, setUserModels] = useState([]);
+    useEffect(() => {
+        const fetchUserModels = async () => {
+          try {
+            if (userID === null || userRole === null) {
+              return;
+            }
+    
+            if (accessToken !== null) {
+              const model_available = await modelAvailableCall(accessToken, userID, userRole);
+              let available_model_names = model_available["data"].map(
+                (element: { id: string }) => element.id
+              );
+              console.log("available_model_names:", available_model_names);
+              setUserModels(available_model_names);
+            }
+          } catch (error) {
+            console.error("Error fetching user models:", error);
+          }
+        };
+      
+        fetchUserModels();
+      }, [accessToken, userID, userRole]);
+
+    // logic to decide what models to display
+    let modelsToDisplay = [];
+    if (selectedTeam && selectedTeam.models) {
+        modelsToDisplay = selectedTeam.models;
+    }
+
+    // check if "all-proxy-models" is in modelsToDisplay
+    if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
+        console.log("user models:", userModels);
+        modelsToDisplay = userModels;
+    }
+    return (
+        <>
+        <div className="mb-5">
+        <p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">{selectedTeam?.team_alias}</p>
+        </div>
+    </>
+    )
+}
+
+export default ViewUserTeam;
+