Merge branch 'main' into litellm_azure_content_filter_fallbacks

2024-06-22 21:28:29 -07:00 · 2024-06-22 21:28:29 -07:00 · 0454c0781a
commit 0454c0781a
parent 2c7a80d08d cea630022e
51 changed files with 1650 additions and 1074 deletions
--- a/docs/my-website/docs/proxy/team_budgets.md
+++ b/docs/my-website/docs/proxy/team_budgets.md
@ -152,3 +152,104 @@ litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-
 ```


+### Dynamic TPM Allocation 
+
+Prevent projects from gobbling too much quota. 
+
+Dynamically allocate TPM quota to api keys, based on active keys in that minute.
+
+1. Setup config.yaml 
+
+```yaml 
+model_list: 
+  - model_name: my-fake-model
+    litellm_params:
+      model: gpt-3.5-turbo
+      api_key: my-fake-key
+      mock_response: hello-world
+      tpm: 60
+
+litellm_settings: 
+  callbacks: ["dynamic_rate_limiter"]
+
+general_settings:
+  master_key: sk-1234 # OR set `LITELLM_MASTER_KEY=".."` in your .env
+  database_url: postgres://.. # OR set `DATABASE_URL=".."` in your .env
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
+```python
+"""
+- Run 2 concurrent teams calling same model
+- model has 60 TPM
+- Mock response returns 30 total tokens / request
+- Each team will only be able to make 1 request per minute
+"""
+"""
+- Run 2 concurrent teams calling same model
+- model has 60 TPM
+- Mock response returns 30 total tokens / request
+- Each team will only be able to make 1 request per minute
+"""
+import requests
+from openai import OpenAI, RateLimitError
+
+def create_key(api_key: str, base_url: str): 
+    response = requests.post(
+        url="{}/key/generate".format(base_url), 
+        json={},
+        headers={
+            "Authorization": "Bearer {}".format(api_key)
+        }
+    )
+
+    _response = response.json()
+
+    return _response["key"]
+
+key_1 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
+key_2 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
+
+# call proxy with key 1 - works
+openai_client_1 = OpenAI(api_key=key_1, base_url="http://0.0.0.0:4000")
+
+response = openai_client_1.chat.completions.with_raw_response.create(
+    model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
+)
+
+print("Headers for call 1 - {}".format(response.headers))
+_response = response.parse()
+print("Total tokens for call - {}".format(_response.usage.total_tokens))
+
+
+# call proxy with key 2 -  works 
+openai_client_2 = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
+
+response = openai_client_2.chat.completions.with_raw_response.create(
+    model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
+)
+
+print("Headers for call 2 - {}".format(response.headers))
+_response = response.parse()
+print("Total tokens for call - {}".format(_response.usage.total_tokens))
+# call proxy with key 2 -  fails
+try:  
+    openai_client_2.chat.completions.with_raw_response.create(model="my-fake-model", messages=[{"role": "user", "content": "Hey, how's it going?"}])
+    raise Exception("This should have failed!")
+except RateLimitError as e: 
+    print("This was rate limited b/c - {}".format(str(e)))
+
+```
+
+**Expected Response**
+
+```
+This was rate limited b/c - Error code: 429 - {'error': {'message': {'error': 'Key=<hashed_token> over available TPM=0. Model TPM=0, Active keys=2'}, 'type': 'None', 'param': 'None', 'code': 429}}
+```
--- a/litellm/init.py
+++ b/litellm/init.py
@ -37,7 +37,9 @@ input_callback: List[Union[str, Callable]] = []
 success_callback: List[Union[str, Callable]] = []
 failure_callback: List[Union[str, Callable]] = []
 service_callback: List[Union[str, Callable]] = []
-_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter", "logfire"]
+_custom_logger_compatible_callbacks_literal = Literal[
+    "lago", "openmeter", "logfire", "dynamic_rate_limiter"
+]
 callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
 _langfuse_default_tags: Optional[
    List[
@ -735,6 +737,7 @@ from .utils import (
    client,
    exception_type,
    get_optional_params,
+    get_response_string,
    modify_integration,
    token_counter,
    create_pretrained_tokenizer,
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -1,11 +1,12 @@
-from datetime import datetime
+from datetime import datetime, timedelta
+from typing import TYPE_CHECKING, Any, Optional, Union
+
 import litellm
 from litellm.proxy._types import UserAPIKeyAuth
-from .types.services import ServiceTypes, ServiceLoggerPayload
-from .integrations.prometheus_services import PrometheusServicesLogger
+
 from .integrations.custom_logger import CustomLogger
-from datetime import timedelta
-from typing import Union, Optional, TYPE_CHECKING, Any
+from .integrations.prometheus_services import PrometheusServicesLogger
+from .types.services import ServiceLoggerPayload, ServiceTypes

 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
@ -53,8 +54,8 @@ class ServiceLogging(CustomLogger):
        call_type: str,
        duration: float,
        parent_otel_span: Optional[Span] = None,
-        start_time: Optional[datetime] = None,
-        end_time: Optional[datetime] = None,
+        start_time: Optional[Union[datetime, float]] = None,
+        end_time: Optional[Union[datetime, float]] = None,
    ):
        """
        - For counting if the redis, postgres call is successful
@ -92,8 +93,8 @@ class ServiceLogging(CustomLogger):
        error: Union[str, Exception],
        call_type: str,
        parent_otel_span: Optional[Span] = None,
-        start_time: Optional[datetime] = None,
-        end_time: Optional[datetime] = None,
+        start_time: Optional[Union[datetime, float]] = None,
+        end_time: Optional[Union[float, datetime]] = None,
    ):
        """
        - For counting if the redis, postgres call is unsuccessful
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -7,14 +7,21 @@
 #
 #  Thank you users! We ❤️ you! - Krrish & Ishaan

-import litellm
-import time, logging, asyncio
-import json, traceback, ast, hashlib
-from typing import Optional, Literal, List, Union, Any, BinaryIO
+import ast
+import asyncio
+import hashlib
+import json
+import logging
+import time
+import traceback
+from datetime import timedelta
+from typing import Any, BinaryIO, List, Literal, Optional, Union
+
 from openai._models import BaseModel as OpenAIObject
+
+import litellm
 from litellm._logging import verbose_logger
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
-import traceback


 def print_verbose(print_statement):
@ -78,6 +85,17 @@ class InMemoryCache(BaseCache):
            else:
                self.set_cache(key=cache_key, value=cache_value)

+    async def async_set_cache_sadd(self, key, value: List, ttl: Optional[float]):
+        """
+        Add value to set
+        """
+        # get the value
+        init_value = self.get_cache(key=key) or set()
+        for val in value:
+            init_value.add(val)
+        self.set_cache(key, init_value, ttl=ttl)
+        return value
+
    def get_cache(self, key, **kwargs):
        if key in self.cache_dict:
            if key in self.ttl_dict:
@ -147,10 +165,12 @@ class RedisCache(BaseCache):
        namespace: Optional[str] = None,
        **kwargs,
    ):
-        from ._redis import get_redis_client, get_redis_connection_pool
-        from litellm._service_logger import ServiceLogging
        import redis

+        from litellm._service_logger import ServiceLogging
+
+        from ._redis import get_redis_client, get_redis_connection_pool
+
        redis_kwargs = {}
        if host is not None:
            redis_kwargs["host"] = host
@ -329,6 +349,7 @@ class RedisCache(BaseCache):
                    start_time=start_time,
                    end_time=end_time,
                    parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
+                    call_type="async_set_cache",
                )
            )
            # NON blocking - notify users Redis is throwing an exception
@ -448,6 +469,80 @@ class RedisCache(BaseCache):
                cache_value,
            )

+    async def async_set_cache_sadd(
+        self, key, value: List, ttl: Optional[float], **kwargs
+    ):
+        start_time = time.time()
+        try:
+            _redis_client = self.init_async_client()
+        except Exception as e:
+            end_time = time.time()
+            _duration = end_time - start_time
+            asyncio.create_task(
+                self.service_logger_obj.async_service_failure_hook(
+                    service=ServiceTypes.REDIS,
+                    duration=_duration,
+                    error=e,
+                    start_time=start_time,
+                    end_time=end_time,
+                    parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
+                    call_type="async_set_cache_sadd",
+                )
+            )
+            # NON blocking - notify users Redis is throwing an exception
+            verbose_logger.error(
+                "LiteLLM Redis Caching: async set() - Got exception from REDIS %s, Writing value=%s",
+                str(e),
+                value,
+            )
+            raise e
+
+        key = self.check_and_fix_namespace(key=key)
+        async with _redis_client as redis_client:
+            print_verbose(
+                f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
+            )
+            try:
+                await redis_client.sadd(key, *value)
+                if ttl is not None:
+                    _td = timedelta(seconds=ttl)
+                    await redis_client.expire(key, _td)
+                print_verbose(
+                    f"Successfully Set ASYNC Redis Cache SADD: key: {key}\nValue {value}\nttl={ttl}"
+                )
+                end_time = time.time()
+                _duration = end_time - start_time
+                asyncio.create_task(
+                    self.service_logger_obj.async_service_success_hook(
+                        service=ServiceTypes.REDIS,
+                        duration=_duration,
+                        call_type="async_set_cache_sadd",
+                        start_time=start_time,
+                        end_time=end_time,
+                        parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
+                    )
+                )
+            except Exception as e:
+                end_time = time.time()
+                _duration = end_time - start_time
+                asyncio.create_task(
+                    self.service_logger_obj.async_service_failure_hook(
+                        service=ServiceTypes.REDIS,
+                        duration=_duration,
+                        error=e,
+                        call_type="async_set_cache_sadd",
+                        start_time=start_time,
+                        end_time=end_time,
+                        parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
+                    )
+                )
+                # NON blocking - notify users Redis is throwing an exception
+                verbose_logger.error(
+                    "LiteLLM Redis Caching: async set_cache_sadd() - Got exception from REDIS %s, Writing value=%s",
+                    str(e),
+                    value,
+                )
+
    async def batch_cache_write(self, key, value, **kwargs):
        print_verbose(
            f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}",
@ -886,11 +981,10 @@ class RedisSemanticCache(BaseCache):

    def get_cache(self, key, **kwargs):
        print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
-        from redisvl.query import VectorQuery
        import numpy as np
+        from redisvl.query import VectorQuery

        # query
-
        # get the messages
        messages = kwargs["messages"]
        prompt = "".join(message["content"] for message in messages)
@ -943,7 +1037,8 @@ class RedisSemanticCache(BaseCache):

    async def async_set_cache(self, key, value, **kwargs):
        import numpy as np
-        from litellm.proxy.proxy_server import llm_router, llm_model_list
+
+        from litellm.proxy.proxy_server import llm_model_list, llm_router

        try:
            await self.index.acreate(overwrite=False)  # don't overwrite existing index
@ -998,12 +1093,12 @@ class RedisSemanticCache(BaseCache):

    async def async_get_cache(self, key, **kwargs):
        print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
-        from redisvl.query import VectorQuery
        import numpy as np
-        from litellm.proxy.proxy_server import llm_router, llm_model_list
+        from redisvl.query import VectorQuery
+
+        from litellm.proxy.proxy_server import llm_model_list, llm_router

        # query
-
        # get the messages
        messages = kwargs["messages"]
        prompt = "".join(message["content"] for message in messages)
@ -1161,7 +1256,8 @@ class S3Cache(BaseCache):
        self.set_cache(key=key, value=value, **kwargs)

    def get_cache(self, key, **kwargs):
-        import boto3, botocore
+        import boto3
+        import botocore

        try:
            key = self.key_prefix + key
@ -1471,7 +1567,7 @@ class DualCache(BaseCache):
                    key, value, **kwargs
                )

-            if self.redis_cache is not None and local_only == False:
+            if self.redis_cache is not None and local_only is False:
                result = await self.redis_cache.async_increment(key, value, **kwargs)

            return result
@ -1480,6 +1576,38 @@ class DualCache(BaseCache):
            verbose_logger.debug(traceback.format_exc())
            raise e

+    async def async_set_cache_sadd(
+        self, key, value: List, local_only: bool = False, **kwargs
+    ) -> None:
+        """
+        Add value to a set
+
+        Key - the key in cache
+
+        Value - str - the value you want to add to the set
+
+        Returns - None
+        """
+        try:
+            if self.in_memory_cache is not None:
+                _ = await self.in_memory_cache.async_set_cache_sadd(
+                    key, value, ttl=kwargs.get("ttl", None)
+                )
+
+            if self.redis_cache is not None and local_only is False:
+                _ = await self.redis_cache.async_set_cache_sadd(
+                    key, value, ttl=kwargs.get("ttl", None) ** kwargs
+                )
+
+            return None
+        except Exception as e:
+            verbose_logger.error(
+                "LiteLLM Cache: Excepton async set_cache_sadd: {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            raise e
+
    def flush_cache(self):
        if self.in_memory_cache is not None:
            self.in_memory_cache.flush_cache()
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -105,8 +105,8 @@ class OpenTelemetry(CustomLogger):
        self,
        payload: ServiceLoggerPayload,
        parent_otel_span: Optional[Span] = None,
-        start_time: Optional[datetime] = None,
-        end_time: Optional[datetime] = None,
+        start_time: Optional[Union[datetime, float]] = None,
+        end_time: Optional[Union[datetime, float]] = None,
    ):
        from datetime import datetime

@ -144,8 +144,8 @@ class OpenTelemetry(CustomLogger):
        self,
        payload: ServiceLoggerPayload,
        parent_otel_span: Optional[Span] = None,
-        start_time: Optional[datetime] = None,
-        end_time: Optional[datetime] = None,
+        start_time: Optional[Union[datetime, float]] = None,
+        end_time: Optional[Union[float, datetime]] = None,
    ):
        from datetime import datetime

--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -19,7 +19,8 @@ from litellm import (
    turn_off_message_logging,
    verbose_logger,
 )
-from litellm.caching import InMemoryCache, S3Cache
+
+from litellm.caching import InMemoryCache, S3Cache, DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.redact_messages import (
    redact_message_input_output_from_logging,
@ -1899,7 +1900,11 @@ def set_callbacks(callback_list, function_id=None):

 def _init_custom_logger_compatible_class(
    logging_integration: litellm._custom_logger_compatible_callbacks_literal,
-) -> Callable:
+    internal_usage_cache: Optional[DualCache],
+    llm_router: Optional[
+        Any
+    ],  # expect litellm.Router, but typing errors due to circular import
+) -> CustomLogger:
    if logging_integration == "lago":
        for callback in _in_memory_loggers:
            if isinstance(callback, LagoLogger):
@ -1935,3 +1940,58 @@ def _init_custom_logger_compatible_class(
        _otel_logger = OpenTelemetry(config=otel_config)
        _in_memory_loggers.append(_otel_logger)
        return _otel_logger  # type: ignore
+    elif logging_integration == "dynamic_rate_limiter":
+        from litellm.proxy.hooks.dynamic_rate_limiter import (
+            _PROXY_DynamicRateLimitHandler,
+        )
+
+        for callback in _in_memory_loggers:
+            if isinstance(callback, _PROXY_DynamicRateLimitHandler):
+                return callback  # type: ignore
+
+        if internal_usage_cache is None:
+            raise Exception(
+                "Internal Error: Cache cannot be empty - internal_usage_cache={}".format(
+                    internal_usage_cache
+                )
+            )
+
+        dynamic_rate_limiter_obj = _PROXY_DynamicRateLimitHandler(
+            internal_usage_cache=internal_usage_cache
+        )
+
+        if llm_router is not None and isinstance(llm_router, litellm.Router):
+            dynamic_rate_limiter_obj.update_variables(llm_router=llm_router)
+        _in_memory_loggers.append(dynamic_rate_limiter_obj)
+        return dynamic_rate_limiter_obj  # type: ignore
+
+
+def get_custom_logger_compatible_class(
+    logging_integration: litellm._custom_logger_compatible_callbacks_literal,
+) -> Optional[CustomLogger]:
+    if logging_integration == "lago":
+        for callback in _in_memory_loggers:
+            if isinstance(callback, LagoLogger):
+                return callback
+    elif logging_integration == "openmeter":
+        for callback in _in_memory_loggers:
+            if isinstance(callback, OpenMeterLogger):
+                return callback
+    elif logging_integration == "logfire":
+        if "LOGFIRE_TOKEN" not in os.environ:
+            raise ValueError("LOGFIRE_TOKEN not found in environment variables")
+        from litellm.integrations.opentelemetry import OpenTelemetry
+
+        for callback in _in_memory_loggers:
+            if isinstance(callback, OpenTelemetry):
+                return callback  # type: ignore
+
+    elif logging_integration == "dynamic_rate_limiter":
+        from litellm.proxy.hooks.dynamic_rate_limiter import (
+            _PROXY_DynamicRateLimitHandler,
+        )
+
+        for callback in _in_memory_loggers:
+            if isinstance(callback, _PROXY_DynamicRateLimitHandler):
+                return callback  # type: ignore
+    return None
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -1,63 +1,64 @@
 # What is this?
 ## Initial implementation of calling bedrock via httpx client (allows for async calls).
 ## V1 - covers cohere + anthropic claude-3 support
-from functools import partial
-import os, types
+import copy
 import json
-from enum import Enum
-import requests, copy  # type: ignore
+import os
 import time
+import types
+import urllib.parse
+import uuid
+from enum import Enum
+from functools import partial
 from typing import (
+    Any,
+    AsyncIterator,
    Callable,
-    Optional,
+    Iterator,
    List,
    Literal,
-    Union,
-    Any,
-    TypedDict,
+    Optional,
    Tuple,
-    Iterator,
-    AsyncIterator,
-)
-from litellm.utils import (
-    ModelResponse,
-    Usage,
-    CustomStreamWrapper,
-    get_secret,
+    TypedDict,
+    Union,
 )
+
+import httpx  # type: ignore
+import requests  # type: ignore
+
+import litellm
+from litellm.caching import DualCache
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.litellm_logging import Logging
-from litellm.types.utils import Message, Choices
-import litellm, uuid
-from .prompt_templates.factory import (
-    prompt_factory,
-    custom_prompt,
-    cohere_message_pt,
-    construct_tool_use_system_prompt,
-    extract_between_tags,
-    parse_xml_params,
-    contains_tag,
-    _bedrock_converse_messages_pt,
-    _bedrock_tools_pt,
-)
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_async_httpx_client,
    _get_httpx_client,
 )
-from .base import BaseLLM
-import httpx  # type: ignore
-from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
 from litellm.types.llms.bedrock import *
-import urllib.parse
 from litellm.types.llms.openai import (
+    ChatCompletionDeltaChunk,
    ChatCompletionResponseMessage,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
-    ChatCompletionDeltaChunk,
 )
-from litellm.caching import DualCache
+from litellm.types.utils import Choices, Message
+from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret
+
+from .base import BaseLLM
+from .bedrock import BedrockError, ModelResponseIterator, convert_messages_to_prompt
+from .prompt_templates.factory import (
+    _bedrock_converse_messages_pt,
+    _bedrock_tools_pt,
+    cohere_message_pt,
+    construct_tool_use_system_prompt,
+    contains_tag,
+    custom_prompt,
+    extract_between_tags,
+    parse_xml_params,
+    prompt_factory,
+)

 iam_cache = DualCache()

@ -171,6 +172,7 @@ async def make_call(
    messages: list,
    logging_obj,
 ):
+    try:
        if client is None:
            client = _get_async_httpx_client()  # Create a new client if none provided

@ -191,6 +193,13 @@ async def make_call(
        )

        return completion_stream
+    except httpx.HTTPStatusError as err:
+        error_code = err.response.status_code
+        raise BedrockError(status_code=error_code, message=str(err))
+    except httpx.TimeoutException as e:
+        raise BedrockError(status_code=408, message="Timeout error occurred.")
+    except Exception as e:
+        raise BedrockError(status_code=500, message=str(e))


 def make_sync_call(
@ -704,7 +713,6 @@ class BedrockLLM(BaseLLM):
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        try:
            import boto3
-
            from botocore.auth import SigV4Auth
            from botocore.awsrequest import AWSRequest
            from botocore.credentials import Credentials
@ -1650,7 +1658,6 @@ class BedrockConverseLLM(BaseLLM):
    ):
        try:
            import boto3
-
            from botocore.auth import SigV4Auth
            from botocore.awsrequest import AWSRequest
            from botocore.credentials import Credentials
@ -1904,8 +1911,8 @@ class BedrockConverseLLM(BaseLLM):


 def get_response_stream_shape():
-    from botocore.model import ServiceModel
    from botocore.loaders import Loader
+    from botocore.model import ServiceModel

    loader = Loader()
    bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -1218,6 +1218,7 @@ class ModelResponseIterator:
    def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
        try:
            processed_chunk = GenerateContentResponseBody(**chunk)  # type: ignore
+
            text = ""
            tool_use: Optional[ChatCompletionToolCallChunk] = None
            is_finished = False
@ -1236,7 +1237,8 @@ class ModelResponseIterator:
                finish_reason = map_finish_reason(
                    finish_reason=gemini_chunk["finishReason"]
                )
-                is_finished = True
+                ## DO NOT SET 'finish_reason' = True
+                ## GEMINI SETS FINISHREASON ON EVERY CHUNK!

            if "usageMetadata" in processed_chunk:
                usage = ChatCompletionUsageBlock(
@ -1250,7 +1252,7 @@ class ModelResponseIterator:
            returned_chunk = GenericStreamingChunk(
                text=text,
                tool_use=tool_use,
-                is_finished=is_finished,
+                is_finished=False,
                finish_reason=finish_reason,
                usage=usage,
                index=0,
@ -1268,9 +1270,8 @@ class ModelResponseIterator:
            chunk = self.response_iterator.__next__()
            self.coro.send(chunk)
            if self.events:
-                event = self.events[0]
+                event = self.events.pop(0)
                json_chunk = event
-                self.events.clear()
                return self.chunk_parser(chunk=json_chunk)
            return GenericStreamingChunk(
                text="",
@ -1281,6 +1282,9 @@ class ModelResponseIterator:
                tool_use=None,
            )
        except StopIteration:
+            if self.events:  # flush the events
+                event = self.events.pop(0)  # Remove the first event
+                return self.chunk_parser(chunk=event)
            raise StopIteration
        except ValueError as e:
            raise RuntimeError(f"Error parsing chunk: {e}")
@ -1295,9 +1299,8 @@ class ModelResponseIterator:
            chunk = await self.async_response_iterator.__anext__()
            self.coro.send(chunk)
            if self.events:
-                event = self.events[0]
+                event = self.events.pop(0)
                json_chunk = event
-                self.events.clear()
                return self.chunk_parser(chunk=json_chunk)
            return GenericStreamingChunk(
                text="",
@ -1308,6 +1311,9 @@ class ModelResponseIterator:
                tool_use=None,
            )
        except StopAsyncIteration:
+            if self.events:  # flush the events
+                event = self.events.pop(0)  # Remove the first event
+                return self.chunk_parser(chunk=event)
            raise StopAsyncIteration
        except ValueError as e:
            raise RuntimeError(f"Error parsing chunk: {e}")
--- a/litellm/main.py
+++ b/litellm/main.py
@ -428,7 +428,7 @@ def mock_completion(
    model: str,
    messages: List,
    stream: Optional[bool] = False,
-    mock_response: Union[str, Exception] = "This is a mock request",
+    mock_response: Union[str, Exception, dict] = "This is a mock request",
    mock_tool_calls: Optional[List] = None,
    logging=None,
    custom_llm_provider=None,
--- a/litellm/proxy/_experimental/out/_next/static/DahySukItzAH9ZoOiMmQB/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/DahySukItzAH9ZoOiMmQB/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/DahySukItzAH9ZoOiMmQB/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/DahySukItzAH9ZoOiMmQB/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-da04a591bae84617.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-da04a591bae84617.js
@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-fd30ae439831db99.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-fd30ae439831db99.js
@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-42b04008af7da690.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-42b04008af7da690.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-5b9334558218205d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-5b9334558218205d.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-5b9334558218205d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dDWf4yi4zCe685SxgCnWX\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-42b04008af7da690.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-5b9334558218205d.js"],""]
+3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-42b04008af7da690.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -2,6 +2,6 @@
 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-da04a591bae84617.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-fd30ae439831db99.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,14 +1,10 @@
 model_list: 
  - model_name: my-fake-model
    litellm_params:
-      model: gpt-3.5-turbo
+      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
      api_key: my-fake-key
-      mock_response: hello-world
-  - model_name: gpt-4o
-    litellm_params:
-      model: azure/gpt-4o
-      api_base: https://litellm8397336933.openai.azure.com/
-      api_key: 610f806211ab47f2a694493000045858
+      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000

 litellm_settings:
-  content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}]
+  success_callback: ["langfuse"]
+  failure_callback: ["langfuse"]
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -30,6 +30,7 @@ model_list:
    api_key: os.environ/AZURE_API_KEY
    api_version: 2024-02-15-preview
    model: azure/chatgpt-v-2
+    tpm: 100
  model_name: gpt-3.5-turbo
 - litellm_params:
    model: anthropic.claude-3-sonnet-20240229-v1:0
@ -40,6 +41,7 @@ model_list:
    api_version: 2024-02-15-preview
    model: azure/chatgpt-v-2
    drop_params: True
+    tpm: 100
  model_name: gpt-3.5-turbo
 - model_name: tts
  litellm_params:
@ -67,8 +69,7 @@ model_list:
    max_input_tokens: 80920

 litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+  callbacks: ["dynamic_rate_limiter"]
  # default_team_settings: 
  #   - team_id: proj1
  #     success_callback: ["langfuse"]
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -188,6 +188,9 @@ class LiteLLMRoutes(enum.Enum):
        # audio transcription
        "/audio/transcriptions",
        "/v1/audio/transcriptions",
+        # audio Speech
+        "/audio/speech",
+        "/v1/audio/speech",
        # moderations
        "/moderations",
        "/v1/moderations",
--- a/litellm/proxy/hooks/dynamic_rate_limiter.py
+++ b/litellm/proxy/hooks/dynamic_rate_limiter.py
@ -0,0 +1,205 @@
+# What is this?
+## Allocates dynamic tpm/rpm quota for a project based on current traffic
+## Tracks num active projects per minute
+
+import asyncio
+import sys
+import traceback
+from datetime import datetime
+from typing import List, Literal, Optional, Tuple, Union
+
+from fastapi import HTTPException
+
+import litellm
+from litellm import ModelResponse, Router
+from litellm._logging import verbose_proxy_logger
+from litellm.caching import DualCache
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.router import ModelGroupInfo
+from litellm.utils import get_utc_datetime
+
+
+class DynamicRateLimiterCache:
+    """
+    Thin wrapper on DualCache for this file.
+
+    Track number of active projects calling a model.
+    """
+
+    def __init__(self, cache: DualCache) -> None:
+        self.cache = cache
+        self.ttl = 60  # 1 min ttl
+
+    async def async_get_cache(self, model: str) -> Optional[int]:
+        dt = get_utc_datetime()
+        current_minute = dt.strftime("%H-%M")
+        key_name = "{}:{}".format(current_minute, model)
+        _response = await self.cache.async_get_cache(key=key_name)
+        response: Optional[int] = None
+        if _response is not None:
+            response = len(_response)
+        return response
+
+    async def async_set_cache_sadd(self, model: str, value: List):
+        """
+        Add value to set.
+
+        Parameters:
+        - model: str, the name of the model group
+        - value: str, the team id
+
+        Returns:
+        - None
+
+        Raises:
+        - Exception, if unable to connect to cache client (if redis caching enabled)
+        """
+        try:
+            dt = get_utc_datetime()
+            current_minute = dt.strftime("%H-%M")
+
+            key_name = "{}:{}".format(current_minute, model)
+            await self.cache.async_set_cache_sadd(
+                key=key_name, value=value, ttl=self.ttl
+            )
+        except Exception as e:
+            verbose_proxy_logger.error(
+                "litellm.proxy.hooks.dynamic_rate_limiter.py::async_set_cache_sadd(): Exception occured - {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            raise e
+
+
+class _PROXY_DynamicRateLimitHandler(CustomLogger):
+
+    # Class variables or attributes
+    def __init__(self, internal_usage_cache: DualCache):
+        self.internal_usage_cache = DynamicRateLimiterCache(cache=internal_usage_cache)
+
+    def update_variables(self, llm_router: Router):
+        self.llm_router = llm_router
+
+    async def check_available_tpm(
+        self, model: str
+    ) -> Tuple[Optional[int], Optional[int], Optional[int]]:
+        """
+        For a given model, get its available tpm
+
+        Returns
+        - Tuple[available_tpm, model_tpm, active_projects]
+            - available_tpm: int or null - always 0 or positive.
+            - remaining_model_tpm: int or null. If available tpm is int, then this will be too.
+            - active_projects: int or null
+        """
+        active_projects = await self.internal_usage_cache.async_get_cache(model=model)
+        current_model_tpm: Optional[int] = await self.llm_router.get_model_group_usage(
+            model_group=model
+        )
+        model_group_info: Optional[ModelGroupInfo] = (
+            self.llm_router.get_model_group_info(model_group=model)
+        )
+        total_model_tpm: Optional[int] = None
+        if model_group_info is not None and model_group_info.tpm is not None:
+            total_model_tpm = model_group_info.tpm
+
+        remaining_model_tpm: Optional[int] = None
+        if total_model_tpm is not None and current_model_tpm is not None:
+            remaining_model_tpm = total_model_tpm - current_model_tpm
+        elif total_model_tpm is not None:
+            remaining_model_tpm = total_model_tpm
+
+        available_tpm: Optional[int] = None
+
+        if remaining_model_tpm is not None:
+            if active_projects is not None:
+                available_tpm = int(remaining_model_tpm / active_projects)
+            else:
+                available_tpm = remaining_model_tpm
+
+        if available_tpm is not None and available_tpm < 0:
+            available_tpm = 0
+        return available_tpm, remaining_model_tpm, active_projects
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ],
+    ) -> Optional[
+        Union[Exception, str, dict]
+    ]:  # raise exception if invalid, return a str for the user to receive - if rejected, or return a modified dictionary for passing into litellm
+        """
+        - For a model group
+        - Check if tpm available
+        - Raise RateLimitError if no tpm available
+        """
+        if "model" in data:
+            available_tpm, model_tpm, active_projects = await self.check_available_tpm(
+                model=data["model"]
+            )
+            if available_tpm is not None and available_tpm == 0:
+                raise HTTPException(
+                    status_code=429,
+                    detail={
+                        "error": "Key={} over available TPM={}. Model TPM={}, Active keys={}".format(
+                            user_api_key_dict.api_key,
+                            available_tpm,
+                            model_tpm,
+                            active_projects,
+                        )
+                    },
+                )
+            elif available_tpm is not None:
+                ## UPDATE CACHE WITH ACTIVE PROJECT
+                asyncio.create_task(
+                    self.internal_usage_cache.async_set_cache_sadd(  # this is a set
+                        model=data["model"],  # type: ignore
+                        value=[user_api_key_dict.token or "default_key"],
+                    )
+                )
+        return None
+
+    async def async_post_call_success_hook(
+        self, user_api_key_dict: UserAPIKeyAuth, response
+    ):
+        try:
+            if isinstance(response, ModelResponse):
+                model_info = self.llm_router.get_model_info(
+                    id=response._hidden_params["model_id"]
+                )
+                assert (
+                    model_info is not None
+                ), "Model info for model with id={} is None".format(
+                    response._hidden_params["model_id"]
+                )
+                available_tpm, remaining_model_tpm, active_projects = (
+                    await self.check_available_tpm(model=model_info["model_name"])
+                )
+                response._hidden_params["additional_headers"] = {
+                    "x-litellm-model_group": model_info["model_name"],
+                    "x-ratelimit-remaining-litellm-project-tokens": available_tpm,
+                    "x-ratelimit-remaining-model-tokens": remaining_model_tpm,
+                    "x-ratelimit-current-active-projects": active_projects,
+                }
+
+                return response
+            return await super().async_post_call_success_hook(
+                user_api_key_dict, response
+            )
+        except Exception as e:
+            verbose_proxy_logger.error(
+                "litellm.proxy.hooks.dynamic_rate_limiter.py::async_post_call_success_hook(): Exception occured - {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            return response
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -433,6 +433,7 @@ def get_custom_headers(
    version: Optional[str] = None,
    model_region: Optional[str] = None,
    fastest_response_batch_completion: Optional[bool] = None,
+    **kwargs,
 ) -> dict:
    exclude_values = {"", None}
    headers = {
@ -448,6 +449,7 @@ def get_custom_headers(
            if fastest_response_batch_completion is not None
            else None
        ),
+        **{k: str(v) for k, v in kwargs.items()},
    }
    try:
        return {
@ -2524,11 +2526,10 @@ async def async_data_generator(
        yield f"data: {done_message}\n\n"
    except Exception as e:
        verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
-                str(e)
+            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}\n{}".format(
+                str(e), traceback.format_exc()
            )
        )
-        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict,
            original_exception=e,
@ -2644,7 +2645,9 @@ async def startup_event():
        redis_cache=redis_usage_cache
    )  # used by parallel request limiter for rate limiting keys across instances

-    proxy_logging_obj._init_litellm_callbacks()  # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
+    proxy_logging_obj._init_litellm_callbacks(
+        llm_router=llm_router
+    )  # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made

    if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types:
        asyncio.create_task(
@ -3061,6 +3064,14 @@ async def chat_completion(
                headers=custom_headers,
            )

+        ### CALL HOOKS ### - modify outgoing data
+        response = await proxy_logging_obj.post_call_success_hook(
+            user_api_key_dict=user_api_key_dict, response=response
+        )
+
+        hidden_params = getattr(response, "_hidden_params", {}) or {}
+        additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
+
        fastapi_response.headers.update(
            get_custom_headers(
                user_api_key_dict=user_api_key_dict,
@ -3070,14 +3081,10 @@ async def chat_completion(
                version=version,
                model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                fastest_response_batch_completion=fastest_response_batch_completion,
+                **additional_headers,
            )
        )

-        ### CALL HOOKS ### - modify outgoing data
-        response = await proxy_logging_obj.post_call_success_hook(
-            user_api_key_dict=user_api_key_dict, response=response
-        )
-
        return response
    except RejectedRequestError as e:
        _data = e.request_data
@ -3116,11 +3123,10 @@ async def chat_completion(
    except Exception as e:
        data["litellm_status"] = "fail"  # used for alerting
        verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
-                get_error_message_str(e=e)
+            "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}\n{}".format(
+                get_error_message_str(e=e), traceback.format_exc()
            )
        )
-        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
@ -7502,6 +7508,12 @@ async def login(request: Request):
            litellm_dashboard_ui += "/ui/"
        import jwt

+        if litellm_master_key_hash is None:
+            raise HTTPException(
+                status_code=500,
+                detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
+            )
+
        jwt_token = jwt.encode(
            {
                "user_id": user_id,
@ -7511,11 +7523,13 @@ async def login(request: Request):
                "login_method": "username_password",
                "premium_user": premium_user,
            },
-            "secret",
+            litellm_master_key_hash,
            algorithm="HS256",
        )
-        litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token
-        return RedirectResponse(url=litellm_dashboard_ui, status_code=303)
+        litellm_dashboard_ui += "?userID=" + user_id
+        redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
+        redirect_response.set_cookie(key="token", value=jwt_token)
+        return redirect_response
    elif _user_row is not None:
        """
        When sharing invite links
@ -7564,6 +7578,14 @@ async def login(request: Request):
                litellm_dashboard_ui += "/ui/"
            import jwt

+            if litellm_master_key_hash is None:
+                raise HTTPException(
+                    status_code=500,
+                    detail={
+                        "error": "No master key set, please set LITELLM_MASTER_KEY"
+                    },
+                )
+
            jwt_token = jwt.encode(
                {
                    "user_id": user_id,
@ -7573,11 +7595,15 @@ async def login(request: Request):
                    "login_method": "username_password",
                    "premium_user": premium_user,
                },
-                "secret",
+                litellm_master_key_hash,
                algorithm="HS256",
            )
-            litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token
-            return RedirectResponse(url=litellm_dashboard_ui, status_code=303)
+            litellm_dashboard_ui += "?userID=" + user_id
+            redirect_response = RedirectResponse(
+                url=litellm_dashboard_ui, status_code=303
+            )
+            redirect_response.set_cookie(key="token", value=jwt_token)
+            return redirect_response
        else:
            raise ProxyException(
                message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nNot valid credentials for {username}",
@ -7688,6 +7714,12 @@ async def onboarding(invite_link: str):
        litellm_dashboard_ui += "/ui/onboarding"
    import jwt

+    if litellm_master_key_hash is None:
+        raise HTTPException(
+            status_code=500,
+            detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
+        )
+
    jwt_token = jwt.encode(
        {
            "user_id": user_obj.user_id,
@ -7697,7 +7729,7 @@ async def onboarding(invite_link: str):
            "login_method": "username_password",
            "premium_user": premium_user,
        },
-        "secret",
+        litellm_master_key_hash,
        algorithm="HS256",
    )

@ -8108,6 +8140,12 @@ async def auth_callback(request: Request):

    import jwt

+    if litellm_master_key_hash is None:
+        raise HTTPException(
+            status_code=500,
+            detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
+        )
+
    jwt_token = jwt.encode(
        {
            "user_id": user_id,
@ -8117,11 +8155,13 @@ async def auth_callback(request: Request):
            "login_method": "sso",
            "premium_user": premium_user,
        },
-        "secret",
+        litellm_master_key_hash,
        algorithm="HS256",
    )
-    litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token
-    return RedirectResponse(url=litellm_dashboard_ui)
+    litellm_dashboard_ui += "?userID=" + user_id
+    redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
+    redirect_response.set_cookie(key="token", value=jwt_token)
+    return redirect_response


 #### INVITATION MANAGEMENT ####
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -229,31 +229,32 @@ class ProxyLogging:
        if redis_cache is not None:
            self.internal_usage_cache.redis_cache = redis_cache

-    def _init_litellm_callbacks(self):
-        print_verbose("INITIALIZING LITELLM CALLBACKS!")
+    def _init_litellm_callbacks(self, llm_router: Optional[litellm.Router] = None):
        self.service_logging_obj = ServiceLogging()
-        litellm.callbacks.append(self.max_parallel_request_limiter)
-        litellm.callbacks.append(self.max_budget_limiter)
-        litellm.callbacks.append(self.cache_control_check)
-        litellm.callbacks.append(self.service_logging_obj)
+        litellm.callbacks.append(self.max_parallel_request_limiter)  # type: ignore
+        litellm.callbacks.append(self.max_budget_limiter)  # type: ignore
+        litellm.callbacks.append(self.cache_control_check)  # type: ignore
+        litellm.callbacks.append(self.service_logging_obj)  # type: ignore
        litellm.success_callback.append(
            self.slack_alerting_instance.response_taking_too_long_callback
        )
        for callback in litellm.callbacks:
            if isinstance(callback, str):
-                callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(
-                    callback
+                callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(  # type: ignore
+                    callback,
+                    internal_usage_cache=self.internal_usage_cache,
+                    llm_router=llm_router,
                )
            if callback not in litellm.input_callback:
-                litellm.input_callback.append(callback)
+                litellm.input_callback.append(callback)  # type: ignore
            if callback not in litellm.success_callback:
-                litellm.success_callback.append(callback)
+                litellm.success_callback.append(callback)  # type: ignore
            if callback not in litellm.failure_callback:
-                litellm.failure_callback.append(callback)
+                litellm.failure_callback.append(callback)  # type: ignore
            if callback not in litellm._async_success_callback:
-                litellm._async_success_callback.append(callback)
+                litellm._async_success_callback.append(callback)  # type: ignore
            if callback not in litellm._async_failure_callback:
-                litellm._async_failure_callback.append(callback)
+                litellm._async_failure_callback.append(callback)  # type: ignore

        if (
            len(litellm.input_callback) > 0
@ -301,10 +302,19 @@ class ProxyLogging:

        try:
            for callback in litellm.callbacks:
-                if isinstance(callback, CustomLogger) and "async_pre_call_hook" in vars(
-                    callback.__class__
+                _callback: Optional[CustomLogger] = None
+                if isinstance(callback, str):
+                    _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
+                        callback
+                    )
+                else:
+                    _callback = callback  # type: ignore
+                if (
+                    _callback is not None
+                    and isinstance(_callback, CustomLogger)
+                    and "async_pre_call_hook" in vars(_callback.__class__)
                ):
-                    response = await callback.async_pre_call_hook(
+                    response = await _callback.async_pre_call_hook(
                        user_api_key_dict=user_api_key_dict,
                        cache=self.call_details["user_api_key_cache"],
                        data=data,
@ -574,8 +584,15 @@ class ProxyLogging:

        for callback in litellm.callbacks:
            try:
-                if isinstance(callback, CustomLogger):
-                    await callback.async_post_call_failure_hook(
+                _callback: Optional[CustomLogger] = None
+                if isinstance(callback, str):
+                    _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
+                        callback
+                    )
+                else:
+                    _callback = callback  # type: ignore
+                if _callback is not None and isinstance(_callback, CustomLogger):
+                    await _callback.async_post_call_failure_hook(
                        user_api_key_dict=user_api_key_dict,
                        original_exception=original_exception,
                    )
@ -596,8 +613,15 @@ class ProxyLogging:
        """
        for callback in litellm.callbacks:
            try:
-                if isinstance(callback, CustomLogger):
-                    await callback.async_post_call_success_hook(
+                _callback: Optional[CustomLogger] = None
+                if isinstance(callback, str):
+                    _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
+                        callback
+                    )
+                else:
+                    _callback = callback  # type: ignore
+                if _callback is not None and isinstance(_callback, CustomLogger):
+                    await _callback.async_post_call_success_hook(
                        user_api_key_dict=user_api_key_dict, response=response
                    )
            except Exception as e:
@ -615,11 +639,22 @@ class ProxyLogging:
        Covers:
        1. /chat/completions
        """
+        response_str: Optional[str] = None
+        if isinstance(response, ModelResponse):
+            response_str = litellm.get_response_string(response_obj=response)
+        if response_str is not None:
            for callback in litellm.callbacks:
                try:
-                if isinstance(callback, CustomLogger):
-                    await callback.async_post_call_streaming_hook(
-                        user_api_key_dict=user_api_key_dict, response=response
+                    _callback: Optional[CustomLogger] = None
+                    if isinstance(callback, str):
+                        _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
+                            callback
+                        )
+                    else:
+                        _callback = callback  # type: ignore
+                    if _callback is not None and isinstance(_callback, CustomLogger):
+                        await _callback.async_post_call_streaming_hook(
+                            user_api_key_dict=user_api_key_dict, response=response_str
                        )
                except Exception as e:
                    raise e
--- a/litellm/router.py
+++ b/litellm/router.py
@ -11,6 +11,7 @@ import asyncio
 import concurrent
 import copy
 import datetime as datetime_og
+import enum
 import hashlib
 import inspect
 import json
@ -90,6 +91,10 @@ from litellm.utils import (
 )


+class RoutingArgs(enum.Enum):
+    ttl = 60  # 1min (RPM/TPM expire key)
+
+
 class Router:
    model_names: List = []
    cache_responses: Optional[bool] = False
@ -387,6 +392,11 @@ class Router:
            routing_strategy=routing_strategy,
            routing_strategy_args=routing_strategy_args,
        )
+        ## USAGE TRACKING ##
+        if isinstance(litellm._async_success_callback, list):
+            litellm._async_success_callback.append(self.deployment_callback_on_success)
+        else:
+            litellm._async_success_callback.append(self.deployment_callback_on_success)
        ## COOLDOWNS ##
        if isinstance(litellm.failure_callback, list):
            litellm.failure_callback.append(self.deployment_callback_on_failure)
@ -2664,13 +2674,69 @@ class Router:
                    time.sleep(_timeout)

            if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES:
-                original_exception.max_retries = num_retries
-                original_exception.num_retries = current_attempt
+                setattr(original_exception, "max_retries", num_retries)
+                setattr(original_exception, "num_retries", current_attempt)

            raise original_exception

    ### HELPER FUNCTIONS

+    async def deployment_callback_on_success(
+        self,
+        kwargs,  # kwargs to completion
+        completion_response,  # response from completion
+        start_time,
+        end_time,  # start/end time
+    ):
+        """
+        Track remaining tpm/rpm quota for model in model_list
+        """
+        try:
+            """
+            Update TPM usage on success
+            """
+            if kwargs["litellm_params"].get("metadata") is None:
+                pass
+            else:
+                model_group = kwargs["litellm_params"]["metadata"].get(
+                    "model_group", None
+                )
+
+                id = kwargs["litellm_params"].get("model_info", {}).get("id", None)
+                if model_group is None or id is None:
+                    return
+                elif isinstance(id, int):
+                    id = str(id)
+
+                total_tokens = completion_response["usage"]["total_tokens"]
+
+                # ------------
+                # Setup values
+                # ------------
+                dt = get_utc_datetime()
+                current_minute = dt.strftime(
+                    "%H-%M"
+                )  # use the same timezone regardless of system clock
+
+                tpm_key = f"global_router:{id}:tpm:{current_minute}"
+                # ------------
+                # Update usage
+                # ------------
+                # update cache
+
+                ## TPM
+                await self.cache.async_increment_cache(
+                    key=tpm_key, value=total_tokens, ttl=RoutingArgs.ttl.value
+                )
+
+        except Exception as e:
+            verbose_router_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            pass
+
    def deployment_callback_on_failure(
        self,
        kwargs,  # kwargs to completion
@ -3870,10 +3936,39 @@ class Router:

        model_group_info: Optional[ModelGroupInfo] = None

+        total_tpm: Optional[int] = None
+        total_rpm: Optional[int] = None
+
        for model in self.model_list:
            if "model_name" in model and model["model_name"] == model_group:
                # model in model group found #
                litellm_params = LiteLLM_Params(**model["litellm_params"])
+                # get model tpm
+                _deployment_tpm: Optional[int] = None
+                if _deployment_tpm is None:
+                    _deployment_tpm = model.get("tpm", None)
+                if _deployment_tpm is None:
+                    _deployment_tpm = model.get("litellm_params", {}).get("tpm", None)
+                if _deployment_tpm is None:
+                    _deployment_tpm = model.get("model_info", {}).get("tpm", None)
+
+                if _deployment_tpm is not None:
+                    if total_tpm is None:
+                        total_tpm = 0
+                    total_tpm += _deployment_tpm  # type: ignore
+                # get model rpm
+                _deployment_rpm: Optional[int] = None
+                if _deployment_rpm is None:
+                    _deployment_rpm = model.get("rpm", None)
+                if _deployment_rpm is None:
+                    _deployment_rpm = model.get("litellm_params", {}).get("rpm", None)
+                if _deployment_rpm is None:
+                    _deployment_rpm = model.get("model_info", {}).get("rpm", None)
+
+                if _deployment_rpm is not None:
+                    if total_rpm is None:
+                        total_rpm = 0
+                    total_rpm += _deployment_rpm  # type: ignore
                # get model info
                try:
                    model_info = litellm.get_model_info(model=litellm_params.model)
@ -3987,8 +4082,44 @@ class Router:
                            "supported_openai_params"
                        ]

+        ## UPDATE WITH TOTAL TPM/RPM FOR MODEL GROUP
+        if total_tpm is not None and model_group_info is not None:
+            model_group_info.tpm = total_tpm
+
+        if total_rpm is not None and model_group_info is not None:
+            model_group_info.rpm = total_rpm
+
        return model_group_info

+    async def get_model_group_usage(self, model_group: str) -> Optional[int]:
+        """
+        Returns remaining tpm quota for model group
+        """
+        dt = get_utc_datetime()
+        current_minute = dt.strftime(
+            "%H-%M"
+        )  # use the same timezone regardless of system clock
+        tpm_keys: List[str] = []
+        for model in self.model_list:
+            if "model_name" in model and model["model_name"] == model_group:
+                tpm_keys.append(
+                    f"global_router:{model['model_info']['id']}:tpm:{current_minute}"
+                )
+
+        ## TPM
+        tpm_usage_list: Optional[List] = await self.cache.async_batch_get_cache(
+            keys=tpm_keys
+        )
+        tpm_usage: Optional[int] = None
+        if tpm_usage_list is not None:
+            for t in tpm_usage_list:
+                if isinstance(t, int):
+                    if tpm_usage is None:
+                        tpm_usage = 0
+                    tpm_usage += t
+
+        return tpm_usage
+
    def get_model_ids(self) -> List[str]:
        """
        Returns list of model id's.
@ -4916,7 +5047,7 @@ class Router:
    def reset(self):
        ## clean up on close
        litellm.success_callback = []
-        litellm.__async_success_callback = []
+        litellm._async_success_callback = []
        litellm.failure_callback = []
        litellm._async_failure_callback = []
        self.retry_policy = None
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -4,6 +4,7 @@ import json
 import logging
 import os
 import sys
+from typing import Any
 from unittest.mock import MagicMock, patch

 logging.basicConfig(level=logging.DEBUG)
@ -24,11 +25,21 @@ import pytest
 def langfuse_client():
    import langfuse

+    _langfuse_cache_key = (
+        f"{os.environ['LANGFUSE_PUBLIC_KEY']}-{os.environ['LANGFUSE_SECRET_KEY']}"
+    )
+    # use a in memory langfuse client for testing, RAM util on ci/cd gets too high when we init many langfuse clients
+    if _langfuse_cache_key in litellm.in_memory_llm_clients_cache:
+        langfuse_client = litellm.in_memory_llm_clients_cache[_langfuse_cache_key]
+    else:
        langfuse_client = langfuse.Langfuse(
            public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
            secret_key=os.environ["LANGFUSE_SECRET_KEY"],
            host=None,
        )
+        litellm.in_memory_llm_clients_cache[_langfuse_cache_key] = langfuse_client
+
+        print("NEW LANGFUSE CLIENT")

    with patch(
        "langfuse.Langfuse", MagicMock(return_value=langfuse_client)
--- a/litellm/tests/test_amazing_langfuse.py
+++ b/litellm/tests/test_amazing_langfuse.py
@ -1,869 +0,0 @@
-import asyncio
-import copy
-import json
-import logging
-import os
-import sys
-from unittest.mock import MagicMock, patch
-
-logging.basicConfig(level=logging.DEBUG)
-sys.path.insert(0, os.path.abspath("../.."))
-
-import litellm
-from litellm import completion
-
-litellm.num_retries = 3
-litellm.success_callback = ["langfuse"]
-os.environ["LANGFUSE_DEBUG"] = "True"
-import time
-
-import pytest
-
-
-@pytest.fixture
-def langfuse_client():
-    import langfuse
-
-    langfuse_client = langfuse.Langfuse(
-        public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
-        secret_key=os.environ["LANGFUSE_SECRET_KEY"],
-        host=None,
-    )
-
-    with patch(
-        "langfuse.Langfuse", MagicMock(return_value=langfuse_client)
-    ) as mock_langfuse_client:
-        yield mock_langfuse_client()
-
-
-def search_logs(log_file_path, num_good_logs=1):
-    """
-    Searches the given log file for logs containing the "/api/public" string.
-
-    Parameters:
-    - log_file_path (str): The path to the log file to be searched.
-
-    Returns:
-    - None
-
-    Raises:
-    - Exception: If there are any bad logs found in the log file.
-    """
-    import re
-
-    print("\n searching logs")
-    bad_logs = []
-    good_logs = []
-    all_logs = []
-    try:
-        with open(log_file_path, "r") as log_file:
-            lines = log_file.readlines()
-            print(f"searching logslines: {lines}")
-            for line in lines:
-                all_logs.append(line.strip())
-                if "/api/public" in line:
-                    print("Found log with /api/public:")
-                    print(line.strip())
-                    print("\n\n")
-                    match = re.search(
-                        r'"POST /api/public/ingestion HTTP/1.1" (\d+) (\d+)',
-                        line,
-                    )
-                    if match:
-                        status_code = int(match.group(1))
-                        print("STATUS CODE", status_code)
-                        if (
-                            status_code != 200
-                            and status_code != 201
-                            and status_code != 207
-                        ):
-                            print("got a BAD log")
-                            bad_logs.append(line.strip())
-                        else:
-                            good_logs.append(line.strip())
-        print("\nBad Logs")
-        print(bad_logs)
-        if len(bad_logs) > 0:
-            raise Exception(f"bad logs, Bad logs = {bad_logs}")
-        assert (
-            len(good_logs) == num_good_logs
-        ), f"Did not get expected number of good logs, expected {num_good_logs}, got {len(good_logs)}. All logs \n {all_logs}"
-        print("\nGood Logs")
-        print(good_logs)
-        if len(good_logs) <= 0:
-            raise Exception(
-                f"There were no Good Logs from Langfuse. No logs with /api/public status 200. \nAll logs:{all_logs}"
-            )
-
-    except Exception as e:
-        raise e
-
-
-def pre_langfuse_setup():
-    """
-    Set up the logging for the 'pre_langfuse_setup' function.
-    """
-    # sends logs to langfuse.log
-    import logging
-
-    # Configure the logging to write to a file
-    logging.basicConfig(filename="langfuse.log", level=logging.DEBUG)
-    logger = logging.getLogger()
-
-    # Add a FileHandler to the logger
-    file_handler = logging.FileHandler("langfuse.log", mode="w")
-    file_handler.setLevel(logging.DEBUG)
-    logger.addHandler(file_handler)
-    return
-
-
-def test_langfuse_logging_async():
-    # this tests time added to make langfuse logging calls, vs just acompletion calls
-    try:
-        pre_langfuse_setup()
-        litellm.set_verbose = True
-
-        # Make 5 calls with an empty success_callback
-        litellm.success_callback = []
-        start_time_empty_callback = asyncio.run(make_async_calls())
-        print("done with no callback test")
-
-        print("starting langfuse test")
-        # Make 5 calls with success_callback set to "langfuse"
-        litellm.success_callback = ["langfuse"]
-        start_time_langfuse = asyncio.run(make_async_calls())
-        print("done with langfuse test")
-
-        # Compare the time for both scenarios
-        print(f"Time taken with success_callback='langfuse': {start_time_langfuse}")
-        print(f"Time taken with empty success_callback: {start_time_empty_callback}")
-
-        # assert the diff is not more than 1 second - this was 5 seconds before the fix
-        assert abs(start_time_langfuse - start_time_empty_callback) < 1
-
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-
-
-async def make_async_calls(metadata=None, **completion_kwargs):
-    tasks = []
-    for _ in range(5):
-        tasks.append(create_async_task())
-
-    # Measure the start time before running the tasks
-    start_time = asyncio.get_event_loop().time()
-
-    # Wait for all tasks to complete
-    responses = await asyncio.gather(*tasks)
-
-    # Print the responses when tasks return
-    for idx, response in enumerate(responses):
-        print(f"Response from Task {idx + 1}: {response}")
-
-    # Calculate the total time taken
-    total_time = asyncio.get_event_loop().time() - start_time
-
-    return total_time
-
-
-def create_async_task(**completion_kwargs):
-    """
-    Creates an async task for the litellm.acompletion function.
-    This is just the task, but it is not run here.
-    To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
-    Any kwargs passed to this function will be passed to the litellm.acompletion function.
-    By default a standard set of arguments are used for the litellm.acompletion function.
-    """
-    completion_args = {
-        "model": "azure/chatgpt-v-2",
-        "api_version": "2024-02-01",
-        "messages": [{"role": "user", "content": "This is a test"}],
-        "max_tokens": 5,
-        "temperature": 0.7,
-        "timeout": 5,
-        "user": "langfuse_latency_test_user",
-        "mock_response": "It's simple to use and easy to get started",
-    }
-    completion_args.update(completion_kwargs)
-    return asyncio.create_task(litellm.acompletion(**completion_args))
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("stream", [False, True])
-async def test_langfuse_logging_without_request_response(stream, langfuse_client):
-    try:
-        import uuid
-
-        _unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
-        litellm.set_verbose = True
-        litellm.turn_off_message_logging = True
-        litellm.success_callback = ["langfuse"]
-        response = await create_async_task(
-            model="gpt-3.5-turbo",
-            stream=stream,
-            metadata={"trace_id": _unique_trace_name},
-        )
-        print(response)
-        if stream:
-            async for chunk in response:
-                print(chunk)
-
-        langfuse_client.flush()
-        await asyncio.sleep(2)
-
-        # get trace with _unique_trace_name
-        trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
-
-        print("trace_from_langfuse", trace)
-
-        _trace_data = trace.data
-
-        assert _trace_data[0].input == {
-            "messages": [{"content": "redacted-by-litellm", "role": "user"}]
-        }
-        assert _trace_data[0].output == {
-            "role": "assistant",
-            "content": "redacted-by-litellm",
-        }
-
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-
-
-@pytest.mark.asyncio
-async def test_langfuse_masked_input_output(langfuse_client):
-    """
-    Test that creates a trace with masked input and output
-    """
-    import uuid
-
-    for mask_value in [True, False]:
-        _unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
-        litellm.set_verbose = True
-        litellm.success_callback = ["langfuse"]
-        response = await create_async_task(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "This is a test"}],
-            metadata={
-                "trace_id": _unique_trace_name,
-                "mask_input": mask_value,
-                "mask_output": mask_value,
-            },
-            mock_response="This is a test response",
-        )
-        print(response)
-        expected_input = (
-            "redacted-by-litellm"
-            if mask_value
-            else {"messages": [{"content": "This is a test", "role": "user"}]}
-        )
-        expected_output = (
-            "redacted-by-litellm"
-            if mask_value
-            else {"content": "This is a test response", "role": "assistant"}
-        )
-        langfuse_client.flush()
-        await asyncio.sleep(2)
-
-        # get trace with _unique_trace_name
-        trace = langfuse_client.get_trace(id=_unique_trace_name)
-        generations = list(
-            reversed(langfuse_client.get_generations(trace_id=_unique_trace_name).data)
-        )
-
-        assert trace.input == expected_input
-        assert trace.output == expected_output
-        assert generations[0].input == expected_input
-        assert generations[0].output == expected_output
-
-
-@pytest.mark.asyncio
-async def test_alangfuse_logging_metadata(langfuse_client):
-    """
-    Test that creates multiple traces, with a varying number of generations and sets various metadata fields
-    Confirms that no metadata that is standard within Langfuse is duplicated in the respective trace or generation metadata
-    For trace continuation certain metadata of the trace is overriden with metadata from the last generation based on the update_trace_keys field
-    Version is set for both the trace and the generation
-    Release is just set for the trace
-    Tags is just set for the trace
-    """
-    import uuid
-
-    litellm.set_verbose = True
-    litellm.success_callback = ["langfuse"]
-
-    trace_identifiers = {}
-    expected_filtered_metadata_keys = {
-        "trace_name",
-        "trace_id",
-        "existing_trace_id",
-        "trace_user_id",
-        "session_id",
-        "tags",
-        "generation_name",
-        "generation_id",
-        "prompt",
-    }
-    trace_metadata = {
-        "trace_actual_metadata_key": "trace_actual_metadata_value"
-    }  # Allows for setting the metadata on the trace
-    run_id = str(uuid.uuid4())
-    session_id = f"litellm-test-session-{run_id}"
-    trace_common_metadata = {
-        "session_id": session_id,
-        "tags": ["litellm-test-tag1", "litellm-test-tag2"],
-        "update_trace_keys": [
-            "output",
-            "trace_metadata",
-        ],  # Overwrite the following fields in the trace with the last generation's output and the trace_user_id
-        "trace_metadata": trace_metadata,
-        "gen_metadata_key": "gen_metadata_value",  # Metadata key that should not be filtered in the generation
-        "trace_release": "litellm-test-release",
-        "version": "litellm-test-version",
-    }
-    for trace_num in range(1, 3):  # Two traces
-        metadata = copy.deepcopy(trace_common_metadata)
-        trace_id = f"litellm-test-trace{trace_num}-{run_id}"
-        metadata["trace_id"] = trace_id
-        metadata["trace_name"] = trace_id
-        trace_identifiers[trace_id] = []
-        print(f"Trace: {trace_id}")
-        for generation_num in range(
-            1, trace_num + 1
-        ):  # Each trace has a number of generations equal to its trace number
-            metadata["trace_user_id"] = f"litellm-test-user{generation_num}-{run_id}"
-            generation_id = (
-                f"litellm-test-trace{trace_num}-generation-{generation_num}-{run_id}"
-            )
-            metadata["generation_id"] = generation_id
-            metadata["generation_name"] = generation_id
-            metadata["trace_metadata"][
-                "generation_id"
-            ] = generation_id  # Update to test if trace_metadata is overwritten by update trace keys
-            trace_identifiers[trace_id].append(generation_id)
-            print(f"Generation: {generation_id}")
-            response = await create_async_task(
-                model="gpt-3.5-turbo",
-                mock_response=f"{session_id}:{trace_id}:{generation_id}",
-                messages=[
-                    {
-                        "role": "user",
-                        "content": f"{session_id}:{trace_id}:{generation_id}",
-                    }
-                ],
-                max_tokens=100,
-                temperature=0.2,
-                metadata=copy.deepcopy(
-                    metadata
-                ),  # Every generation needs its own metadata, langfuse is not async/thread safe without it
-            )
-            print(response)
-            metadata["existing_trace_id"] = trace_id
-
-    langfuse_client.flush()
-    await asyncio.sleep(10)
-
-    # Tests the metadata filtering and the override of the output to be the last generation
-    for trace_id, generation_ids in trace_identifiers.items():
-        trace = langfuse_client.get_trace(id=trace_id)
-        assert trace.id == trace_id
-        assert trace.session_id == session_id
-        assert trace.metadata != trace_metadata
-        generations = list(
-            reversed(langfuse_client.get_generations(trace_id=trace_id).data)
-        )
-        assert len(generations) == len(generation_ids)
-        assert (
-            trace.input == generations[0].input
-        )  # Should be set by the first generation
-        assert (
-            trace.output == generations[-1].output
-        )  # Should be overwritten by the last generation according to update_trace_keys
-        assert (
-            trace.metadata != generations[-1].metadata
-        )  # Should be overwritten by the last generation according to update_trace_keys
-        assert trace.metadata["generation_id"] == generations[-1].id
-        assert set(trace.tags).issuperset(trace_common_metadata["tags"])
-        print("trace_from_langfuse", trace)
-        for generation_id, generation in zip(generation_ids, generations):
-            assert generation.id == generation_id
-            assert generation.trace_id == trace_id
-            print(
-                "common keys in trace",
-                set(generation.metadata.keys()).intersection(
-                    expected_filtered_metadata_keys
-                ),
-            )
-
-            assert set(generation.metadata.keys()).isdisjoint(
-                expected_filtered_metadata_keys
-            )
-            print("generation_from_langfuse", generation)
-
-
-@pytest.mark.skip(reason="beta test - checking langfuse output")
-def test_langfuse_logging():
-    try:
-        pre_langfuse_setup()
-        litellm.set_verbose = True
-        response = completion(
-            model="claude-instant-1.2",
-            messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
-            max_tokens=10,
-            temperature=0.2,
-        )
-        print(response)
-        # time.sleep(5)
-        # # check langfuse.log to see if there was a failed response
-        # search_logs("langfuse.log")
-
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-
-
-# test_langfuse_logging()
-
-
-@pytest.mark.skip(reason="beta test - checking langfuse output")
-def test_langfuse_logging_stream():
-    try:
-        litellm.set_verbose = True
-        response = completion(
-            model="gpt-3.5-turbo",
-            messages=[
-                {
-                    "role": "user",
-                    "content": "this is a streaming test for llama2 + langfuse",
-                }
-            ],
-            max_tokens=20,
-            temperature=0.2,
-            stream=True,
-        )
-        print(response)
-        for chunk in response:
-            pass
-            # print(chunk)
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        print(e)
-
-
-# test_langfuse_logging_stream()
-
-
-@pytest.mark.skip(reason="beta test - checking langfuse output")
-def test_langfuse_logging_custom_generation_name():
-    try:
-        litellm.set_verbose = True
-        response = completion(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
-            max_tokens=10,
-            metadata={
-                "langfuse/foo": "bar",
-                "langsmith/fizz": "buzz",
-                "prompt_hash": "asdf98u0j9131123",
-                "generation_name": "ishaan-test-generation",
-                "generation_id": "gen-id22",
-                "trace_id": "trace-id22",
-                "trace_user_id": "user-id2",
-            },
-        )
-        print(response)
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-        print(e)
-
-
-# test_langfuse_logging_custom_generation_name()
-
-
-@pytest.mark.skip(reason="beta test - checking langfuse output")
-def test_langfuse_logging_embedding():
-    try:
-        litellm.set_verbose = True
-        litellm.success_callback = ["langfuse"]
-        response = litellm.embedding(
-            model="text-embedding-ada-002",
-            input=["gm", "ishaan"],
-        )
-        print(response)
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"An exception occurred - {e}")
-        print(e)
-
-
-@pytest.mark.skip(reason="beta test - checking langfuse output")
-def test_langfuse_logging_function_calling():
-    litellm.set_verbose = True
-    function1 = [
-        {
-            "name": "get_current_weather",
-            "description": "Get the current weather in a given location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city and state, e.g. San Francisco, CA",
-                    },
-                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                },
-                "required": ["location"],
-            },
-        }
-    ]
-    try:
-        response = completion(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "what's the weather in boston"}],
-            temperature=0.1,
-            functions=function1,
-        )
-        print(response)
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        print(e)
-
-
-# test_langfuse_logging_function_calling()
-
-
-@pytest.mark.skip(reason="Need to address this on main")
-def test_aaalangfuse_existing_trace_id():
-    """
-    When existing trace id is passed, don't set trace params -> prevents overwriting the trace
-
-    Pass 1 logging object with a trace
-
-    Pass 2nd logging object with the trace id
-
-    Assert no changes to the trace
-    """
-    # Test - if the logs were sent to the correct team on langfuse
-    import datetime
-
-    import litellm
-    from litellm.integrations.langfuse import LangFuseLogger
-
-    langfuse_Logger = LangFuseLogger(
-        langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
-        langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
-    )
-    litellm.success_callback = ["langfuse"]
-
-    # langfuse_args = {'kwargs': { 'start_time':  'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
-    response_obj = litellm.ModelResponse(
-        id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
-        choices=[
-            litellm.Choices(
-                finish_reason="stop",
-                index=0,
-                message=litellm.Message(
-                    content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
-                    role="assistant",
-                ),
-            )
-        ],
-        created=1714573888,
-        model="gpt-3.5-turbo-0125",
-        object="chat.completion",
-        system_fingerprint="fp_3b956da36b",
-        usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
-    )
-
-    ### NEW TRACE ###
-    message = [{"role": "user", "content": "what's the weather in boston"}]
-    langfuse_args = {
-        "response_obj": response_obj,
-        "kwargs": {
-            "model": "gpt-3.5-turbo",
-            "litellm_params": {
-                "acompletion": False,
-                "api_key": None,
-                "force_timeout": 600,
-                "logger_fn": None,
-                "verbose": False,
-                "custom_llm_provider": "openai",
-                "api_base": "https://api.openai.com/v1/",
-                "litellm_call_id": None,
-                "model_alias_map": {},
-                "completion_call_id": None,
-                "metadata": None,
-                "model_info": None,
-                "proxy_server_request": None,
-                "preset_cache_key": None,
-                "no-log": False,
-                "stream_response": {},
-            },
-            "messages": message,
-            "optional_params": {"temperature": 0.1, "extra_body": {}},
-            "start_time": "2024-05-01 07:31:27.986164",
-            "stream": False,
-            "user": None,
-            "call_type": "completion",
-            "litellm_call_id": None,
-            "completion_start_time": "2024-05-01 07:31:29.903685",
-            "temperature": 0.1,
-            "extra_body": {},
-            "input": [{"role": "user", "content": "what's the weather in boston"}],
-            "api_key": "my-api-key",
-            "additional_args": {
-                "complete_input_dict": {
-                    "model": "gpt-3.5-turbo",
-                    "messages": [
-                        {"role": "user", "content": "what's the weather in boston"}
-                    ],
-                    "temperature": 0.1,
-                    "extra_body": {},
-                }
-            },
-            "log_event_type": "successful_api_call",
-            "end_time": "2024-05-01 07:31:29.903685",
-            "cache_hit": None,
-            "response_cost": 6.25e-05,
-        },
-        "start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
-        "end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
-        "user_id": None,
-        "print_verbose": litellm.print_verbose,
-        "level": "DEFAULT",
-        "status_message": None,
-    }
-
-    langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
-
-    import langfuse
-
-    langfuse_client = langfuse.Langfuse(
-        public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
-        secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
-    )
-
-    trace_id = langfuse_response_object["trace_id"]
-
-    assert trace_id is not None
-
-    langfuse_client.flush()
-
-    time.sleep(2)
-
-    print(langfuse_client.get_trace(id=trace_id))
-
-    initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
-
-    ### EXISTING TRACE ###
-
-    new_metadata = {"existing_trace_id": trace_id}
-    new_messages = [{"role": "user", "content": "What do you know?"}]
-    new_response_obj = litellm.ModelResponse(
-        id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
-        choices=[
-            litellm.Choices(
-                finish_reason="stop",
-                index=0,
-                message=litellm.Message(
-                    content="What do I know?",
-                    role="assistant",
-                ),
-            )
-        ],
-        created=1714573888,
-        model="gpt-3.5-turbo-0125",
-        object="chat.completion",
-        system_fingerprint="fp_3b956da36b",
-        usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
-    )
-    langfuse_args = {
-        "response_obj": new_response_obj,
-        "kwargs": {
-            "model": "gpt-3.5-turbo",
-            "litellm_params": {
-                "acompletion": False,
-                "api_key": None,
-                "force_timeout": 600,
-                "logger_fn": None,
-                "verbose": False,
-                "custom_llm_provider": "openai",
-                "api_base": "https://api.openai.com/v1/",
-                "litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
-                "model_alias_map": {},
-                "completion_call_id": None,
-                "metadata": new_metadata,
-                "model_info": None,
-                "proxy_server_request": None,
-                "preset_cache_key": None,
-                "no-log": False,
-                "stream_response": {},
-            },
-            "messages": new_messages,
-            "optional_params": {"temperature": 0.1, "extra_body": {}},
-            "start_time": "2024-05-01 07:31:27.986164",
-            "stream": False,
-            "user": None,
-            "call_type": "completion",
-            "litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
-            "completion_start_time": "2024-05-01 07:31:29.903685",
-            "temperature": 0.1,
-            "extra_body": {},
-            "input": [{"role": "user", "content": "what's the weather in boston"}],
-            "api_key": "my-api-key",
-            "additional_args": {
-                "complete_input_dict": {
-                    "model": "gpt-3.5-turbo",
-                    "messages": [
-                        {"role": "user", "content": "what's the weather in boston"}
-                    ],
-                    "temperature": 0.1,
-                    "extra_body": {},
-                }
-            },
-            "log_event_type": "successful_api_call",
-            "end_time": "2024-05-01 07:31:29.903685",
-            "cache_hit": None,
-            "response_cost": 6.25e-05,
-        },
-        "start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
-        "end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
-        "user_id": None,
-        "print_verbose": litellm.print_verbose,
-        "level": "DEFAULT",
-        "status_message": None,
-    }
-
-    langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
-
-    new_trace_id = langfuse_response_object["trace_id"]
-
-    assert new_trace_id == trace_id
-
-    langfuse_client.flush()
-
-    time.sleep(2)
-
-    print(langfuse_client.get_trace(id=trace_id))
-
-    new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
-
-    initial_langfuse_trace_dict = dict(initial_langfuse_trace)
-    initial_langfuse_trace_dict.pop("updatedAt")
-    initial_langfuse_trace_dict.pop("timestamp")
-
-    new_langfuse_trace_dict = dict(new_langfuse_trace)
-    new_langfuse_trace_dict.pop("updatedAt")
-    new_langfuse_trace_dict.pop("timestamp")
-
-    assert initial_langfuse_trace_dict == new_langfuse_trace_dict
-
-
-@pytest.mark.skipif(
-    condition=not os.environ.get("OPENAI_API_KEY", False),
-    reason="Authentication missing for openai",
-)
-def test_langfuse_logging_tool_calling():
-    litellm.set_verbose = True
-
-    def get_current_weather(location, unit="fahrenheit"):
-        """Get the current weather in a given location"""
-        if "tokyo" in location.lower():
-            return json.dumps(
-                {"location": "Tokyo", "temperature": "10", "unit": "celsius"}
-            )
-        elif "san francisco" in location.lower():
-            return json.dumps(
-                {"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
-            )
-        elif "paris" in location.lower():
-            return json.dumps(
-                {"location": "Paris", "temperature": "22", "unit": "celsius"}
-            )
-        else:
-            return json.dumps({"location": location, "temperature": "unknown"})
-
-    messages = [
-        {
-            "role": "user",
-            "content": "What's the weather like in San Francisco, Tokyo, and Paris?",
-        }
-    ]
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "The city and state, e.g. San Francisco, CA",
-                        },
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                    },
-                    "required": ["location"],
-                },
-            },
-        }
-    ]
-
-    response = litellm.completion(
-        model="gpt-3.5-turbo-1106",
-        messages=messages,
-        tools=tools,
-        tool_choice="auto",  # auto is default, but we'll be explicit
-    )
-    print("\nLLM Response1:\n", response)
-    response_message = response.choices[0].message
-    tool_calls = response.choices[0].message.tool_calls
-
-
-# test_langfuse_logging_tool_calling()
-
-
-def get_langfuse_prompt(name: str):
-    import langfuse
-    from langfuse import Langfuse
-
-    try:
-        langfuse = Langfuse(
-            public_key=os.environ["LANGFUSE_DEV_PUBLIC_KEY"],
-            secret_key=os.environ["LANGFUSE_DEV_SK_KEY"],
-            host=os.environ["LANGFUSE_HOST"],
-        )
-
-        # Get current production version of a text prompt
-        prompt = langfuse.get_prompt(name=name)
-        return prompt
-    except Exception as e:
-        raise Exception(f"Error getting prompt: {e}")
-
-
-@pytest.mark.asyncio
-@pytest.mark.skip(
-    reason="local only test, use this to verify if we can send request to litellm proxy server"
-)
-async def test_make_request():
-    response = await litellm.acompletion(
-        model="openai/llama3",
-        api_key="sk-1234",
-        base_url="http://localhost:4000",
-        messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
-        extra_body={
-            "metadata": {
-                "tags": ["openai"],
-                "prompt": get_langfuse_prompt("test-chat"),
-            }
-        },
-    )
--- a/litellm/tests/test_dynamic_rate_limit_handler.py
+++ b/litellm/tests/test_dynamic_rate_limit_handler.py
@ -0,0 +1,486 @@
+# What is this?
+## Unit tests for 'dynamic_rate_limiter.py`
+import asyncio
+import os
+import random
+import sys
+import time
+import traceback
+import uuid
+from datetime import datetime
+from typing import Optional, Tuple
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest
+
+import litellm
+from litellm import DualCache, Router
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.hooks.dynamic_rate_limiter import (
+    _PROXY_DynamicRateLimitHandler as DynamicRateLimitHandler,
+)
+
+"""
+Basic test cases:
+
+- If 1 'active' project => give all tpm
+- If 2 'active' projects => divide tpm in 2
+"""
+
+
+@pytest.fixture
+def dynamic_rate_limit_handler() -> DynamicRateLimitHandler:
+    internal_cache = DualCache()
+    return DynamicRateLimitHandler(internal_usage_cache=internal_cache)
+
+
+@pytest.fixture
+def mock_response() -> litellm.ModelResponse:
+    return litellm.ModelResponse(
+        **{
+            "id": "chatcmpl-abc123",
+            "object": "chat.completion",
+            "created": 1699896916,
+            "model": "gpt-3.5-turbo-0125",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": "call_abc123",
+                                "type": "function",
+                                "function": {
+                                    "name": "get_current_weather",
+                                    "arguments": '{\n"location": "Boston, MA"\n}',
+                                },
+                            }
+                        ],
+                    },
+                    "logprobs": None,
+                    "finish_reason": "tool_calls",
+                }
+            ],
+            "usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10},
+        }
+    )
+
+
+@pytest.fixture
+def user_api_key_auth() -> UserAPIKeyAuth:
+    return UserAPIKeyAuth()
+
+
+@pytest.mark.parametrize("num_projects", [1, 2, 100])
+@pytest.mark.asyncio
+async def test_available_tpm(num_projects, dynamic_rate_limit_handler):
+    model = "my-fake-model"
+    ## SET CACHE W/ ACTIVE PROJECTS
+    projects = [str(uuid.uuid4()) for _ in range(num_projects)]
+
+    await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
+        model=model, value=projects
+    )
+
+    model_tpm = 100
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                },
+            }
+        ]
+    )
+    dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
+
+    ## CHECK AVAILABLE TPM PER PROJECT
+
+    availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+        model=model
+    )
+
+    expected_availability = int(model_tpm / num_projects)
+
+    assert availability == expected_availability
+
+
+@pytest.mark.asyncio
+async def test_rate_limit_raised(dynamic_rate_limit_handler, user_api_key_auth):
+    """
+    Unit test. Tests if rate limit error raised when quota exhausted.
+    """
+    from fastapi import HTTPException
+
+    model = "my-fake-model"
+    ## SET CACHE W/ ACTIVE PROJECTS
+    projects = [str(uuid.uuid4())]
+
+    await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
+        model=model, value=projects
+    )
+
+    model_tpm = 0
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                },
+            }
+        ]
+    )
+    dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
+
+    ## CHECK AVAILABLE TPM PER PROJECT
+
+    availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+        model=model
+    )
+
+    expected_availability = int(model_tpm / 1)
+
+    assert availability == expected_availability
+
+    ## CHECK if exception raised
+
+    try:
+        await dynamic_rate_limit_handler.async_pre_call_hook(
+            user_api_key_dict=user_api_key_auth,
+            cache=DualCache(),
+            data={"model": model},
+            call_type="completion",
+        )
+        pytest.fail("Expected this to raise HTTPexception")
+    except HTTPException as e:
+        assert e.status_code == 429  # check if rate limit error raised
+        pass
+
+
+@pytest.mark.asyncio
+async def test_base_case(dynamic_rate_limit_handler, mock_response):
+    """
+    If just 1 active project
+
+    it should get all the quota
+
+    = allow request to go through
+    - update token usage
+    - exhaust all tpm with just 1 project
+    - assert ratelimiterror raised at 100%+1 tpm
+    """
+    model = "my-fake-model"
+    ## model tpm - 50
+    model_tpm = 50
+    ## tpm per request - 10
+    setattr(
+        mock_response,
+        "usage",
+        litellm.Usage(prompt_tokens=5, completion_tokens=5, total_tokens=10),
+    )
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                    "mock_response": mock_response,
+                },
+            }
+        ]
+    )
+    dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
+
+    prev_availability: Optional[int] = None
+    allowed_fails = 1
+    for _ in range(5):
+        try:
+            # check availability
+            availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+                model=model
+            )
+
+            ## assert availability updated
+            if prev_availability is not None and availability is not None:
+                assert availability == prev_availability - 10
+
+            print(
+                "prev_availability={}, availability={}".format(
+                    prev_availability, availability
+                )
+            )
+
+            prev_availability = availability
+
+            # make call
+            await llm_router.acompletion(
+                model=model, messages=[{"role": "user", "content": "hey!"}]
+            )
+
+            await asyncio.sleep(3)
+        except Exception:
+            if allowed_fails > 0:
+                allowed_fails -= 1
+            else:
+                raise
+
+
+@pytest.mark.asyncio
+async def test_update_cache(
+    dynamic_rate_limit_handler, mock_response, user_api_key_auth
+):
+    """
+    Check if active project correctly updated
+    """
+    model = "my-fake-model"
+    model_tpm = 50
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                    "mock_response": mock_response,
+                },
+            }
+        ]
+    )
+    dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
+
+    ## INITIAL ACTIVE PROJECTS - ASSERT NONE
+    _, _, active_projects = await dynamic_rate_limit_handler.check_available_tpm(
+        model=model
+    )
+
+    assert active_projects is None
+
+    ## MAKE CALL
+    await dynamic_rate_limit_handler.async_pre_call_hook(
+        user_api_key_dict=user_api_key_auth,
+        cache=DualCache(),
+        data={"model": model},
+        call_type="completion",
+    )
+
+    await asyncio.sleep(2)
+    ## INITIAL ACTIVE PROJECTS - ASSERT 1
+    _, _, active_projects = await dynamic_rate_limit_handler.check_available_tpm(
+        model=model
+    )
+
+    assert active_projects == 1
+
+
+@pytest.mark.parametrize("num_projects", [2])
+@pytest.mark.asyncio
+async def test_multiple_projects(
+    dynamic_rate_limit_handler, mock_response, num_projects
+):
+    """
+    If 2 active project
+
+    it should split 50% each
+
+    - assert available tpm is 0 after 50%+1 tpm calls
+    """
+    model = "my-fake-model"
+    model_tpm = 50
+    total_tokens_per_call = 10
+    step_tokens_per_call_per_project = total_tokens_per_call / num_projects
+
+    available_tpm_per_project = int(model_tpm / num_projects)
+
+    ## SET CACHE W/ ACTIVE PROJECTS
+    projects = [str(uuid.uuid4()) for _ in range(num_projects)]
+    await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
+        model=model, value=projects
+    )
+
+    expected_runs = int(available_tpm_per_project / step_tokens_per_call_per_project)
+
+    setattr(
+        mock_response,
+        "usage",
+        litellm.Usage(
+            prompt_tokens=5, completion_tokens=5, total_tokens=total_tokens_per_call
+        ),
+    )
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                    "mock_response": mock_response,
+                },
+            }
+        ]
+    )
+    dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
+
+    prev_availability: Optional[int] = None
+
+    print("expected_runs: {}".format(expected_runs))
+    for i in range(expected_runs + 1):
+        # check availability
+        availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+            model=model
+        )
+
+        ## assert availability updated
+        if prev_availability is not None and availability is not None:
+            assert (
+                availability == prev_availability - step_tokens_per_call_per_project
+            ), "Current Availability: Got={}, Expected={}, Step={}, Tokens per step={}, Initial model tpm={}".format(
+                availability,
+                prev_availability - 10,
+                i,
+                step_tokens_per_call_per_project,
+                model_tpm,
+            )
+
+        print(
+            "prev_availability={}, availability={}".format(
+                prev_availability, availability
+            )
+        )
+
+        prev_availability = availability
+
+        # make call
+        await llm_router.acompletion(
+            model=model, messages=[{"role": "user", "content": "hey!"}]
+        )
+
+        await asyncio.sleep(3)
+
+    # check availability
+    availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+        model=model
+    )
+    assert availability == 0
+
+
+@pytest.mark.parametrize("num_projects", [2])
+@pytest.mark.asyncio
+async def test_multiple_projects_e2e(
+    dynamic_rate_limit_handler, mock_response, num_projects
+):
+    """
+    2 parallel calls with different keys, same model
+
+    If 2 active project
+
+    it should split 50% each
+
+    - assert available tpm is 0 after 50%+1 tpm calls
+    """
+    model = "my-fake-model"
+    model_tpm = 50
+    total_tokens_per_call = 10
+    step_tokens_per_call_per_project = total_tokens_per_call / num_projects
+
+    available_tpm_per_project = int(model_tpm / num_projects)
+
+    ## SET CACHE W/ ACTIVE PROJECTS
+    projects = [str(uuid.uuid4()) for _ in range(num_projects)]
+    await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
+        model=model, value=projects
+    )
+
+    expected_runs = int(available_tpm_per_project / step_tokens_per_call_per_project)
+
+    setattr(
+        mock_response,
+        "usage",
+        litellm.Usage(
+            prompt_tokens=5, completion_tokens=5, total_tokens=total_tokens_per_call
+        ),
+    )
+
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                    "mock_response": mock_response,
+                },
+            }
+        ]
+    )
+    dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
+
+    prev_availability: Optional[int] = None
+
+    print("expected_runs: {}".format(expected_runs))
+    for i in range(expected_runs + 1):
+        # check availability
+        availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+            model=model
+        )
+
+        ## assert availability updated
+        if prev_availability is not None and availability is not None:
+            assert (
+                availability == prev_availability - step_tokens_per_call_per_project
+            ), "Current Availability: Got={}, Expected={}, Step={}, Tokens per step={}, Initial model tpm={}".format(
+                availability,
+                prev_availability - 10,
+                i,
+                step_tokens_per_call_per_project,
+                model_tpm,
+            )
+
+        print(
+            "prev_availability={}, availability={}".format(
+                prev_availability, availability
+            )
+        )
+
+        prev_availability = availability
+
+        # make call
+        await llm_router.acompletion(
+            model=model, messages=[{"role": "user", "content": "hey!"}]
+        )
+
+        await asyncio.sleep(3)
+
+    # check availability
+    availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
+        model=model
+    )
+    assert availability == 0
--- a/litellm/tests/test_proxy_routes.py
+++ b/litellm/tests/test_proxy_routes.py
@ -0,0 +1,52 @@
+import os
+import sys
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import io
+import os
+
+# this file is to test litellm/proxy
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import asyncio
+import logging
+
+import pytest
+
+import litellm
+from litellm.proxy._types import LiteLLMRoutes
+from litellm.proxy.proxy_server import router
+
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG,  # Set the desired logging level
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+
+def test_routes_on_litellm_proxy():
+    """
+    Goal of this test: Test that we have all the critical OpenAI Routes on the Proxy server Fast API router
+
+
+    this prevents accidentelly deleting /threads, or /batches etc
+    """
+    _all_routes = []
+    for route in router.routes:
+
+        _path_as_str = str(route.path)
+        if ":path" in _path_as_str:
+            # remove the :path
+            _path_as_str = _path_as_str.replace(":path", "")
+        _all_routes.append(_path_as_str)
+
+    print("ALL ROUTES on LiteLLM Proxy:", _all_routes)
+    print("\n\n")
+    print("ALL OPENAI ROUTES:", LiteLLMRoutes.openai_routes.value)
+
+    for route in LiteLLMRoutes.openai_routes.value:
+        assert route in _all_routes
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -1730,3 +1730,99 @@ async def test_router_text_completion_client():
        print(responses)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.fixture
+def mock_response() -> litellm.ModelResponse:
+    return litellm.ModelResponse(
+        **{
+            "id": "chatcmpl-abc123",
+            "object": "chat.completion",
+            "created": 1699896916,
+            "model": "gpt-3.5-turbo-0125",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": "call_abc123",
+                                "type": "function",
+                                "function": {
+                                    "name": "get_current_weather",
+                                    "arguments": '{\n"location": "Boston, MA"\n}',
+                                },
+                            }
+                        ],
+                    },
+                    "logprobs": None,
+                    "finish_reason": "tool_calls",
+                }
+            ],
+            "usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10},
+        }
+    )
+
+
+@pytest.mark.asyncio
+async def test_router_model_usage(mock_response):
+    """
+    Test if tracking used model tpm works as expected
+    """
+    model = "my-fake-model"
+    model_tpm = 100
+    setattr(
+        mock_response,
+        "usage",
+        litellm.Usage(prompt_tokens=5, completion_tokens=5, total_tokens=10),
+    )
+
+    print(f"mock_response: {mock_response}")
+    model_tpm = 100
+    llm_router = Router(
+        model_list=[
+            {
+                "model_name": model,
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "my-key",
+                    "api_base": "my-base",
+                    "tpm": model_tpm,
+                    "mock_response": mock_response,
+                },
+            }
+        ]
+    )
+
+    allowed_fails = 1  # allow for changing b/w minutes
+
+    for _ in range(2):
+        try:
+            _ = await llm_router.acompletion(
+                model=model, messages=[{"role": "user", "content": "Hey!"}]
+            )
+            await asyncio.sleep(3)
+
+            initial_usage = await llm_router.get_model_group_usage(model_group=model)
+
+            # completion call - 10 tokens
+            _ = await llm_router.acompletion(
+                model=model, messages=[{"role": "user", "content": "Hey!"}]
+            )
+
+            await asyncio.sleep(3)
+            updated_usage = await llm_router.get_model_group_usage(model_group=model)
+
+            assert updated_usage == initial_usage + 10  # type: ignore
+            break
+        except Exception as e:
+            if allowed_fails > 0:
+                print(
+                    f"Decrementing allowed_fails: {allowed_fails}.\nReceived error - {str(e)}"
+                )
+                allowed_fails -= 1
+            else:
+                print(f"allowed_fails: {allowed_fails}")
+                raise e
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -742,7 +742,9 @@ def test_completion_palm_stream():
 # test_completion_palm_stream()


-def test_completion_gemini_stream():
+@pytest.mark.parametrize("sync_mode", [False])  # True,
+@pytest.mark.asyncio
+async def test_completion_gemini_stream(sync_mode):
    try:
        litellm.set_verbose = True
        print("Streaming gemini response")
@ -750,29 +752,58 @@ def test_completion_gemini_stream():
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
-                "content": "how does a court case get to the Supreme Court?",
+                "content": "Who was Alexander?",
            },
        ]
        print("testing gemini streaming")
-        response = completion(model="gemini/gemini-pro", messages=messages, stream=True)
-        print(f"type of response at the top: {response}")
        complete_response = ""
        # Add any assertions here to check the response
+        non_empty_chunks = 0
+
+        if sync_mode:
+            response = completion(
+                model="gemini/gemini-1.5-flash",
+                messages=messages,
+                stream=True,
+            )
+
            for idx, chunk in enumerate(response):
                print(chunk)
                # print(chunk.choices[0].delta)
                chunk, finished = streaming_format_tests(idx, chunk)
                if finished:
                    break
+                non_empty_chunks += 1
                complete_response += chunk
+        else:
+            response = await litellm.acompletion(
+                model="gemini/gemini-1.5-flash",
+                messages=messages,
+                stream=True,
+            )
+
+            idx = 0
+            async for chunk in response:
+                print(chunk)
+                # print(chunk.choices[0].delta)
+                chunk, finished = streaming_format_tests(idx, chunk)
+                if finished:
+                    break
+                non_empty_chunks += 1
+                complete_response += chunk
+                idx += 1
+
        if complete_response.strip() == "":
            raise Exception("Empty response received")
        print(f"completion_response: {complete_response}")
-    except litellm.APIError as e:
+        assert non_empty_chunks > 1
+    except litellm.InternalServerError as e:
+        pass
+    except litellm.RateLimitError as e:
        pass
    except Exception as e:
-        if "429 Resource has been exhausted":
-            return
+        # if "429 Resource has been exhausted":
+        #     return
        pytest.fail(f"Error occurred: {e}")


--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -443,6 +443,8 @@ class ModelGroupInfo(BaseModel):
            "chat", "embedding", "completion", "image_generation", "audio_transcription"
        ]
    ] = Field(default="chat")
+    tpm: Optional[int] = None
+    rpm: Optional[int] = None
    supports_parallel_function_calling: bool = Field(default=False)
    supports_vision: bool = Field(default=False)
    supports_function_calling: bool = Field(default=False)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -340,14 +340,15 @@ def function_setup(
        )
    try:
        global callback_list, add_breadcrumb, user_logger_fn, Logging
+
        function_id = kwargs["id"] if "id" in kwargs else None

        if len(litellm.callbacks) > 0:
            for callback in litellm.callbacks:
                # check if callback is a string - e.g. "lago", "openmeter"
                if isinstance(callback, str):
-                    callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(
-                        callback
+                    callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(  # type: ignore
+                        callback, internal_usage_cache=None, llm_router=None
                    )
                    if any(
                        isinstance(cb, type(callback))
@ -3895,12 +3896,16 @@ def get_formatted_prompt(


 def get_response_string(response_obj: ModelResponse) -> str:
-    _choices: List[Choices] = response_obj.choices  # type: ignore
+    _choices: List[Union[Choices, StreamingChoices]] = response_obj.choices

    response_str = ""
    for choice in _choices:
+        if isinstance(choice, Choices):
            if choice.message.content is not None:
                response_str += choice.message.content
+        elif isinstance(choice, StreamingChoices):
+            if choice.delta.content is not None:
+                response_str += choice.delta.content

    return response_str

@ -9590,6 +9595,11 @@ class CustomStreamWrapper:
                litellm.request_timeout
            )
            if self.logging_obj is not None:
+                ## LOGGING
+                threading.Thread(
+                    target=self.logging_obj.failure_handler,
+                    args=(e, traceback_exception),
+                ).start()  # log response
                # Handle any exceptions that might occur during streaming
                asyncio.create_task(
                    self.logging_obj.async_failure_handler(e, traceback_exception)
@ -9597,11 +9607,24 @@ class CustomStreamWrapper:
            raise e
        except Exception as e:
            traceback_exception = traceback.format_exc()
+            if self.logging_obj is not None:
+                ## LOGGING
+                threading.Thread(
+                    target=self.logging_obj.failure_handler,
+                    args=(e, traceback_exception),
+                ).start()  # log response
                # Handle any exceptions that might occur during streaming
                asyncio.create_task(
                    self.logging_obj.async_failure_handler(e, traceback_exception)  # type: ignore
                )
-            raise e
+            ## Map to OpenAI Exception
+            raise exception_type(
+                model=self.model,
+                custom_llm_provider=self.custom_llm_provider,
+                original_exception=e,
+                completion_kwargs={},
+                extra_kwargs={},
+            )


 class TextCompletionStreamWrapper:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.23"
+version = "1.40.24"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.40.23"
+version = "1.40.24"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/DahySukItzAH9ZoOiMmQB/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/DahySukItzAH9ZoOiMmQB/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/DahySukItzAH9ZoOiMmQB/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/DahySukItzAH9ZoOiMmQB/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/onboarding/page-da04a591bae84617.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/onboarding/page-da04a591bae84617.js
@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/onboarding/page-fd30ae439831db99.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/onboarding/page-fd30ae439831db99.js
@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-42b04008af7da690.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-42b04008af7da690.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-5b9334558218205d.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-5b9334558218205d.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-5b9334558218205d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dDWf4yi4zCe685SxgCnWX\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-42b04008af7da690.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-5b9334558218205d.js"],""]
+3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-42b04008af7da690.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/model_hub.html
+++ b/ui/litellm-dashboard/out/model_hub.html
--- a/ui/litellm-dashboard/out/model_hub.txt
+++ b/ui/litellm-dashboard/out/model_hub.txt
@ -2,6 +2,6 @@
 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/out/onboarding.html
+++ b/ui/litellm-dashboard/out/onboarding.html
--- a/ui/litellm-dashboard/out/onboarding.txt
+++ b/ui/litellm-dashboard/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-da04a591bae84617.js"],""]
+3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-fd30ae439831db99.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/app/onboarding/page.tsx
+++ b/ui/litellm-dashboard/src/app/onboarding/page.tsx
@ -20,10 +20,19 @@ import {
 } from "@/components/networking";
 import { jwtDecode } from "jwt-decode";
 import { Form, Button as Button2, message } from "antd";
+
+function getCookie(name: string) {
+  console.log("COOKIES", document.cookie)
+  const cookieValue = document.cookie
+      .split('; ')
+      .find(row => row.startsWith(name + '='));
+  return cookieValue ? cookieValue.split('=')[1] : null;
+}
+
 export default function Onboarding() {
  const [form] = Form.useForm();
  const searchParams = useSearchParams();
-  const token = searchParams.get("token");
+  const token = getCookie('token');
  const inviteID = searchParams.get("id");
  const [accessToken, setAccessToken] = useState<string | null>(null);
  const [defaultUserEmail, setDefaultUserEmail] = useState<string>("");
--- a/ui/litellm-dashboard/src/app/page.tsx
+++ b/ui/litellm-dashboard/src/app/page.tsx
@ -19,6 +19,15 @@ import CacheDashboard from "@/components/cache_dashboard";
 import { jwtDecode } from "jwt-decode";
 import { Typography } from "antd";

+function getCookie(name: string) {
+  console.log("COOKIES", document.cookie)
+  const cookieValue = document.cookie
+      .split('; ')
+      .find(row => row.startsWith(name + '='));
+  return cookieValue ? cookieValue.split('=')[1] : null;
+}
+
+
 function formatUserRole(userRole: string) {
  if (!userRole) {
    return "Undefined Role";
@ -68,7 +77,7 @@ const CreateKeyPage = () => {
  const searchParams = useSearchParams();
  const [modelData, setModelData] = useState<any>({ data: [] });
  const userID = searchParams.get("userID");
-  const token = searchParams.get("token");
+  const token = getCookie('token');

  const [page, setPage] = useState("api-keys");
  const [accessToken, setAccessToken] = useState<string | null>(null);
--- a/ui/litellm-dashboard/src/components/user_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/user_dashboard.tsx
@ -24,6 +24,14 @@ type UserSpendData = {
  max_budget?: number | null;
 };

+function getCookie(name: string) {
+  console.log("COOKIES", document.cookie)
+  const cookieValue = document.cookie
+      .split('; ')
+      .find(row => row.startsWith(name + '='));
+  return cookieValue ? cookieValue.split('=')[1] : null;
+}
+
 interface UserDashboardProps {
  userID: string | null;
  userRole: string | null;
@ -66,7 +74,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
  const viewSpend = searchParams.get("viewSpend");
  const router = useRouter();

-  const token = searchParams.get("token");
+  const token = getCookie('token');
+
  const [accessToken, setAccessToken] = useState<string | null>(null);
  const [teamSpend, setTeamSpend] = useState<number | null>(null);
  const [userModels, setUserModels] = useState<string[]>([]);
				`@ -1 +0,0 @@`
				(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)\|\|void 0===s?void 0:s.user_id)\|\|e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
				`@ -0,0 +1 @@`
				(self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)\|\|void 0===s?void 0:s.user_id)\|\|e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);