From fa554ae2181cd6a9f4427d53f49eb1c524b7aa2c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 22 Jun 2024 18:46:30 -0700
Subject: [PATCH 001/150] fix - clean up in memory cache

---
 litellm/caching.py | 68 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 17 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 6b58cf527..dde41ad29 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -7,14 +7,20 @@
 #
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 
-import litellm
-import time, logging, asyncio
-import json, traceback, ast, hashlib
-from typing import Optional, Literal, List, Union, Any, BinaryIO
+import ast
+import asyncio
+import hashlib
+import json
+import logging
+import time
+import traceback
+from typing import Any, BinaryIO, List, Literal, Optional, Union
+
 from openai._models import BaseModel as OpenAIObject
+
+import litellm
 from litellm._logging import verbose_logger
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
-import traceback
 
 
 def print_verbose(print_statement):
@@ -57,10 +63,12 @@ class BaseCache:
 
 
 class InMemoryCache(BaseCache):
-    def __init__(self):
+    def __init__(self, default_ttl: Optional[float] = 60.0):
         # if users don't provider one, use the default litellm cache
-        self.cache_dict = {}
-        self.ttl_dict = {}
+        self.cache_dict: dict = {}
+        self.ttl_dict: dict = {}
+        self.default_ttl = default_ttl
+        self.last_cleaned = 0  # since this is in memory we need to periodically clean it up to not overuse the machines RAM
 
     def set_cache(self, key, value, **kwargs):
         print_verbose("InMemoryCache: set_cache")
@@ -70,6 +78,8 @@ class InMemoryCache(BaseCache):
 
     async def async_set_cache(self, key, value, **kwargs):
         self.set_cache(key=key, value=value, **kwargs)
+        if time.time() > self.last_cleaned:
+            asyncio.create_task(self.clean_up_in_memory_cache())
 
     async def async_set_cache_pipeline(self, cache_list, ttl=None):
         for cache_key, cache_value in cache_list:
@@ -78,6 +88,9 @@ class InMemoryCache(BaseCache):
             else:
                 self.set_cache(key=cache_key, value=cache_value)
 
+        if time.time() > self.last_cleaned:
+            asyncio.create_task(self.clean_up_in_memory_cache())
+
     def get_cache(self, key, **kwargs):
         if key in self.cache_dict:
             if key in self.ttl_dict:
@@ -121,8 +134,26 @@ class InMemoryCache(BaseCache):
         init_value = await self.async_get_cache(key=key) or 0
         value = init_value + value
         await self.async_set_cache(key, value, **kwargs)
+
+        if time.time() > self.last_cleaned:
+            asyncio.create_task(self.clean_up_in_memory_cache())
+
         return value
 
+    async def clean_up_in_memory_cache(self):
+        """
+        Runs periodically to clean up the in-memory cache
+
+        - loop through all keys in cache, check if they are expired
+        - if yes, delete them
+        """
+        for key in list(self.cache_dict.keys()):
+            if key in self.ttl_dict:
+                if time.time() > self.ttl_dict[key]:
+                    self.cache_dict.pop(key, None)
+                    self.ttl_dict.pop(key, None)
+        self.last_cleaned = time.time()
+
     def flush_cache(self):
         self.cache_dict.clear()
         self.ttl_dict.clear()
@@ -147,10 +178,12 @@ class RedisCache(BaseCache):
         namespace: Optional[str] = None,
         **kwargs,
     ):
-        from ._redis import get_redis_client, get_redis_connection_pool
-        from litellm._service_logger import ServiceLogging
         import redis
 
+        from litellm._service_logger import ServiceLogging
+
+        from ._redis import get_redis_client, get_redis_connection_pool
+
         redis_kwargs = {}
         if host is not None:
             redis_kwargs["host"] = host
@@ -886,11 +919,10 @@ class RedisSemanticCache(BaseCache):
 
     def get_cache(self, key, **kwargs):
         print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
-        from redisvl.query import VectorQuery
         import numpy as np
+        from redisvl.query import VectorQuery
 
         # query
-
         # get the messages
         messages = kwargs["messages"]
         prompt = "".join(message["content"] for message in messages)
@@ -943,7 +975,8 @@ class RedisSemanticCache(BaseCache):
 
     async def async_set_cache(self, key, value, **kwargs):
         import numpy as np
-        from litellm.proxy.proxy_server import llm_router, llm_model_list
+
+        from litellm.proxy.proxy_server import llm_model_list, llm_router
 
         try:
             await self.index.acreate(overwrite=False)  # don't overwrite existing index
@@ -998,12 +1031,12 @@ class RedisSemanticCache(BaseCache):
 
     async def async_get_cache(self, key, **kwargs):
         print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
-        from redisvl.query import VectorQuery
         import numpy as np
-        from litellm.proxy.proxy_server import llm_router, llm_model_list
+        from redisvl.query import VectorQuery
+
+        from litellm.proxy.proxy_server import llm_model_list, llm_router
 
         # query
-
         # get the messages
         messages = kwargs["messages"]
         prompt = "".join(message["content"] for message in messages)
@@ -1161,7 +1194,8 @@ class S3Cache(BaseCache):
         self.set_cache(key=key, value=value, **kwargs)
 
     def get_cache(self, key, **kwargs):
-        import boto3, botocore
+        import boto3
+        import botocore
 
         try:
             key = self.key_prefix + key

From 0418db30443e93057dc044f9e7d86a40258db0fc Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 22 Jun 2024 19:21:37 -0700
Subject: [PATCH 002/150] fix caching clear in memory cache mem util

---
 litellm/caching.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index dde41ad29..6ac439e0f 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -64,10 +64,14 @@ class BaseCache:
 
 class InMemoryCache(BaseCache):
     def __init__(self, default_ttl: Optional[float] = 60.0):
+        """
+        default_ttl [float]: If default_ttl is 6 seconds, every 6 seconds the cache will be set to {}
+        this is done to prevent overuse of System RAM
+        """
         # if users don't provider one, use the default litellm cache
         self.cache_dict: dict = {}
         self.ttl_dict: dict = {}
-        self.default_ttl = default_ttl
+        self.default_ttl = default_ttl or 60.0
         self.last_cleaned = 0  # since this is in memory we need to periodically clean it up to not overuse the machines RAM
 
     def set_cache(self, key, value, **kwargs):
@@ -78,7 +82,7 @@ class InMemoryCache(BaseCache):
 
     async def async_set_cache(self, key, value, **kwargs):
         self.set_cache(key=key, value=value, **kwargs)
-        if time.time() > self.last_cleaned:
+        if time.time() - self.last_cleaned > self.default_ttl:
             asyncio.create_task(self.clean_up_in_memory_cache())
 
     async def async_set_cache_pipeline(self, cache_list, ttl=None):
@@ -88,7 +92,7 @@ class InMemoryCache(BaseCache):
             else:
                 self.set_cache(key=cache_key, value=cache_value)
 
-        if time.time() > self.last_cleaned:
+        if time.time() - self.last_cleaned > self.default_ttl:
             asyncio.create_task(self.clean_up_in_memory_cache())
 
     def get_cache(self, key, **kwargs):
@@ -135,7 +139,7 @@ class InMemoryCache(BaseCache):
         value = init_value + value
         await self.async_set_cache(key, value, **kwargs)
 
-        if time.time() > self.last_cleaned:
+        if time.time() - self.last_cleaned > self.default_ttl:
             asyncio.create_task(self.clean_up_in_memory_cache())
 
         return value
@@ -147,11 +151,8 @@ class InMemoryCache(BaseCache):
         - loop through all keys in cache, check if they are expired
         - if yes, delete them
         """
-        for key in list(self.cache_dict.keys()):
-            if key in self.ttl_dict:
-                if time.time() > self.ttl_dict[key]:
-                    self.cache_dict.pop(key, None)
-                    self.ttl_dict.pop(key, None)
+        self.cache_dict = {}
+        self.ttl_dict = {}
         self.last_cleaned = time.time()
 
     def flush_cache(self):

From 8a66e074ce7c92b3cced842095d0d0afc0270418 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 22 Jun 2024 19:51:43 -0700
Subject: [PATCH 003/150] fix in mem cache tests

---
 litellm/caching.py | 4 ++--
 litellm/router.py  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 4fe9ace07..e77d71dd8 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -64,7 +64,7 @@ class BaseCache:
 
 
 class InMemoryCache(BaseCache):
-    def __init__(self, default_ttl: Optional[float] = 60.0):
+    def __init__(self, default_ttl: Optional[float] = 120.0):
         """
         default_ttl [float]: If default_ttl is 6 seconds, every 6 seconds the cache will be set to {}
         this is done to prevent overuse of System RAM
@@ -72,7 +72,7 @@ class InMemoryCache(BaseCache):
         # if users don't provider one, use the default litellm cache
         self.cache_dict: dict = {}
         self.ttl_dict: dict = {}
-        self.default_ttl = default_ttl or 60.0
+        self.default_ttl = default_ttl or 120.0
         self.last_cleaned = 0  # since this is in memory we need to periodically clean it up to not overuse the machines RAM
 
     def set_cache(self, key, value, **kwargs):
diff --git a/litellm/router.py b/litellm/router.py
index df783eab8..8c05a7e8b 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -282,7 +282,7 @@ class Router:
                 litellm.cache = litellm.Cache(type=cache_type, **cache_config)  # type: ignore
             self.cache_responses = cache_responses
         self.cache = DualCache(
-            redis_cache=redis_cache, in_memory_cache=InMemoryCache()
+            redis_cache=redis_cache, in_memory_cache=InMemoryCache(default_ttl=86400)
         )  # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
 
         ### SCHEDULER ###

From 974d92ff45af7a583e1ab17178ee688ab9abd27c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:03:23 -0700
Subject: [PATCH 004/150] fix use caching lib

---
 litellm/caching.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index e77d71dd8..5aa41ce35 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -17,6 +17,7 @@ import traceback
 from datetime import timedelta
 from typing import Any, BinaryIO, List, Literal, Optional, Union
 
+from cachetools import Cache as CachetoolsCache
 from openai._models import BaseModel as OpenAIObject
 
 import litellm
@@ -70,7 +71,9 @@ class InMemoryCache(BaseCache):
         this is done to prevent overuse of System RAM
         """
         # if users don't provider one, use the default litellm cache
-        self.cache_dict: dict = {}
+        self.cache_dict: CachetoolsCache = CachetoolsCache(
+            maxsize=1000,
+        )
         self.ttl_dict: dict = {}
         self.default_ttl = default_ttl or 120.0
         self.last_cleaned = 0  # since this is in memory we need to periodically clean it up to not overuse the machines RAM
@@ -83,8 +86,6 @@ class InMemoryCache(BaseCache):
 
     async def async_set_cache(self, key, value, **kwargs):
         self.set_cache(key=key, value=value, **kwargs)
-        if time.time() - self.last_cleaned > self.default_ttl:
-            asyncio.create_task(self.clean_up_in_memory_cache())
 
     async def async_set_cache_pipeline(self, cache_list, ttl=None):
         for cache_key, cache_value in cache_list:
@@ -93,10 +94,6 @@ class InMemoryCache(BaseCache):
             else:
                 self.set_cache(key=cache_key, value=cache_value)
 
-
-        if time.time() - self.last_cleaned > self.default_ttl:
-            asyncio.create_task(self.clean_up_in_memory_cache())
-
     async def async_set_cache_sadd(self, key, value: List, ttl: Optional[float]):
         """
         Add value to set
@@ -108,7 +105,6 @@ class InMemoryCache(BaseCache):
         self.set_cache(key, init_value, ttl=ttl)
         return value
 
-
     def get_cache(self, key, **kwargs):
         if key in self.cache_dict:
             if key in self.ttl_dict:

From e5ab0d4ecd83e51807f3abcd4195382a16e7668d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:08:30 -0700
Subject: [PATCH 005/150] fix InMemoryCache

---
 litellm/caching.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 5aa41ce35..705b5fc13 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -76,7 +76,6 @@ class InMemoryCache(BaseCache):
         )
         self.ttl_dict: dict = {}
         self.default_ttl = default_ttl or 120.0
-        self.last_cleaned = 0  # since this is in memory we need to periodically clean it up to not overuse the machines RAM
 
     def set_cache(self, key, value, **kwargs):
         print_verbose("InMemoryCache: set_cache")
@@ -149,22 +148,8 @@ class InMemoryCache(BaseCache):
         value = init_value + value
         await self.async_set_cache(key, value, **kwargs)
 
-        if time.time() - self.last_cleaned > self.default_ttl:
-            asyncio.create_task(self.clean_up_in_memory_cache())
-
         return value
 
-    async def clean_up_in_memory_cache(self):
-        """
-        Runs periodically to clean up the in-memory cache
-
-        - loop through all keys in cache, check if they are expired
-        - if yes, delete them
-        """
-        self.cache_dict = {}
-        self.ttl_dict = {}
-        self.last_cleaned = time.time()
-
     def flush_cache(self):
         self.cache_dict.clear()
         self.ttl_dict.clear()

From 5bbbb5a7ee664d730fe02cfa0bb103858b1ba8cd Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:10:34 -0700
Subject: [PATCH 006/150] fix router.py

---
 litellm/router.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/router.py b/litellm/router.py
index 8c05a7e8b..df783eab8 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -282,7 +282,7 @@ class Router:
                 litellm.cache = litellm.Cache(type=cache_type, **cache_config)  # type: ignore
             self.cache_responses = cache_responses
         self.cache = DualCache(
-            redis_cache=redis_cache, in_memory_cache=InMemoryCache(default_ttl=86400)
+            redis_cache=redis_cache, in_memory_cache=InMemoryCache()
         )  # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
 
         ### SCHEDULER ###

From 2e3119e75fbf996fb499da5bac3b55a661625400 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:12:11 -0700
Subject: [PATCH 007/150] fix testing env

---
 .circleci/config.yml | 1 +
 requirements.txt     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index fd1b48a9c..f939fed00 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -39,6 +39,7 @@ jobs:
             pip install "boto3==1.34.34"
             pip install "aioboto3==12.3.0"
             pip install langchain
+            pip install "cachetools==5.3.1"
             pip install lunary==0.2.5
             pip install "langfuse==2.27.1"
             pip install "logfire==0.29.0"
diff --git a/requirements.txt b/requirements.txt
index fbf2bfc1d..4549ea010 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,6 +42,7 @@ jinja2==3.1.4 # for prompt templates
 certifi==2023.7.22 # [TODO] clean up 
 aiohttp==3.9.0 # for network calls
 aioboto3==12.3.0 # for async sagemaker calls
+cachetools==5.3.1 # for in memory caching
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
 ijson==3.2.3 # for google ai studio streaming

From 4053c7aeb33193cc75436715a77b92382f1c6fa1 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:15:53 -0700
Subject: [PATCH 008/150] use lru cache

---
 litellm/caching.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 705b5fc13..ceb8e70b1 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -17,7 +17,7 @@ import traceback
 from datetime import timedelta
 from typing import Any, BinaryIO, List, Literal, Optional, Union
 
-from cachetools import Cache as CachetoolsCache
+from cachetools import LRUCache
 from openai._models import BaseModel as OpenAIObject
 
 import litellm
@@ -71,10 +71,9 @@ class InMemoryCache(BaseCache):
         this is done to prevent overuse of System RAM
         """
         # if users don't provider one, use the default litellm cache
-        self.cache_dict: CachetoolsCache = CachetoolsCache(
-            maxsize=1000,
-        )
-        self.ttl_dict: dict = {}
+        max_size_in_memory = 1000
+        self.cache_dict: LRUCache = LRUCache(maxsize=max_size_in_memory)
+        self.ttl_dict: LRUCache = LRUCache(maxsize=max_size_in_memory)
         self.default_ttl = default_ttl or 120.0
 
     def set_cache(self, key, value, **kwargs):

From b13a93d9bc61f0cd43d2c09fa694ef83d1c73bee Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:24:59 -0700
Subject: [PATCH 009/150] cleanup InMemoryCache

---
 litellm/caching.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index ceb8e70b1..c46dd3af8 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -65,16 +65,13 @@ class BaseCache:
 
 
 class InMemoryCache(BaseCache):
-    def __init__(self, default_ttl: Optional[float] = 120.0):
+    def __init__(self, max_size_in_memory: Optional[int] = 200):
         """
-        default_ttl [float]: If default_ttl is 6 seconds, every 6 seconds the cache will be set to {}
-        this is done to prevent overuse of System RAM
+        max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
         """
-        # if users don't provider one, use the default litellm cache
-        max_size_in_memory = 1000
-        self.cache_dict: LRUCache = LRUCache(maxsize=max_size_in_memory)
-        self.ttl_dict: LRUCache = LRUCache(maxsize=max_size_in_memory)
-        self.default_ttl = default_ttl or 120.0
+        self.max_size_in_memory = max_size_in_memory or 200
+        self.cache_dict: LRUCache = LRUCache(maxsize=self.max_size_in_memory)
+        self.ttl_dict: LRUCache = LRUCache(maxsize=self.max_size_in_memory)
 
     def set_cache(self, key, value, **kwargs):
         print_verbose("InMemoryCache: set_cache")

From effc7579ac1d4b8bf5c7e437d55455d9b25b7097 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:27:14 -0700
Subject: [PATCH 010/150] fix install on python 3.8

---
 .circleci/config.yml | 2 +-
 litellm/caching.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index f939fed00..fc0bb5b98 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -89,7 +89,7 @@ jobs:
           name: Linting Testing
           command: |
             cd litellm
-            python -m pip install types-requests types-setuptools types-redis types-PyYAML
+            python -m pip install types-requests types-setuptools types-redis types-PyYAML types-cachetools
             if ! python -m mypy . --ignore-missing-imports; then
               echo "mypy detected errors"
               exit 1
diff --git a/litellm/caching.py b/litellm/caching.py
index c46dd3af8..78b4bd270 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -17,7 +17,6 @@ import traceback
 from datetime import timedelta
 from typing import Any, BinaryIO, List, Literal, Optional, Union
 
-from cachetools import LRUCache
 from openai._models import BaseModel as OpenAIObject
 
 import litellm
@@ -69,6 +68,8 @@ class InMemoryCache(BaseCache):
         """
         max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
         """
+        from cachetools import LRUCache
+
         self.max_size_in_memory = max_size_in_memory or 200
         self.cache_dict: LRUCache = LRUCache(maxsize=self.max_size_in_memory)
         self.ttl_dict: LRUCache = LRUCache(maxsize=self.max_size_in_memory)

From d0b1d3e9cc54e4e2d229ae8288166059e3c0d779 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:30:48 -0700
Subject: [PATCH 011/150] fix python3.8 with cachetools

---
 .circleci/config.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index fc0bb5b98..548eab3af 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -124,6 +124,7 @@ jobs:
             pip install pytest
             pip install tiktoken
             pip install aiohttp
+            pip install "cachetools==5.3.1"
             pip install click
             pip install jinja2
             pip install tokenizers
@@ -176,6 +177,7 @@ jobs:
             pip install "google-cloud-aiplatform==1.43.0"
             pip install pyarrow
             pip install "boto3==1.34.34"
+            pip install "cachetools==5.3.1"
             pip install "aioboto3==12.3.0"
             pip install langchain
             pip install "langfuse>=2.0.0"

From 6091c7798eaed3de85b4c923cc49b60399397f17 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:31:59 -0700
Subject: [PATCH 012/150] use cache tools as dep

---
 poetry.lock    | 12 ++++++------
 pyproject.toml |  1 +
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 290d19f7a..88927576c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -343,13 +343,13 @@ uvloop = ["uvloop (>=0.15.2)"]
 
 [[package]]
 name = "cachetools"
-version = "5.3.3"
+version = "5.3.1"
 description = "Extensible memoizing collections and decorators"
-optional = true
+optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"},
-    {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"},
+    {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"},
+    {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"},
 ]
 
 [[package]]
@@ -3300,4 +3300,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "f400d2f686954c2b12b0ee88546f31d52ebc8e323a3ec850dc46d74748d38cdf"
+content-hash = "022481b965a1a6524cc25d52eff59592779aafdf03dc6159c834b9519079f549"
diff --git a/pyproject.toml b/pyproject.toml
index 3254ae2e2..af8e050fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ jinja2 = "^3.1.2"
 aiohttp = "*"
 requests = "^2.31.0"
 pydantic = "^2.0.0"
+cachetools = ">=5.3.1"
 ijson = "*"
 
 uvicorn = {version = "^0.22.0", optional = true}

From fa57d2e823fd94a6be90e71c385c53701b2e87a2 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 20:28:03 -0700
Subject: [PATCH 013/150] feat use custom eviction policy

---
 litellm/caching.py | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 78b4bd270..b6921bac8 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -64,21 +64,53 @@ class BaseCache:
 
 
 class InMemoryCache(BaseCache):
-    def __init__(self, max_size_in_memory: Optional[int] = 200):
+    def __init__(
+        self,
+        max_size_in_memory: Optional[int] = 200,
+        default_ttl: Optional[
+            int
+        ] = 300,  # default ttl is 5 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
+    ):
         """
         max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
         """
-        from cachetools import LRUCache
+        self.max_size_in_memory = (
+            max_size_in_memory or 200
+        )  # set an upper bound of 200 items in-memory
+        self.default_ttl = default_ttl or 300
 
-        self.max_size_in_memory = max_size_in_memory or 200
-        self.cache_dict: LRUCache = LRUCache(maxsize=self.max_size_in_memory)
-        self.ttl_dict: LRUCache = LRUCache(maxsize=self.max_size_in_memory)
+        # in-memory cache
+        self.cache_dict: dict = {}
+        self.ttl_dict: dict = {}
+
+    def evict_cache(self):
+        """
+        Eviction policy:
+        - check if any items in ttl_dict are expired -> remove them from ttl_dict and cache_dict
+
+
+        This guarantees the following:
+        - 1. When item ttl not set: At minimumm each item will remain in memory for 5 minutes
+        - 2. When ttl is set: the item will remain in memory for at least that amount of time
+        - 3. the size of in-memory cache is bounded
+
+        """
+        for key in list(self.ttl_dict.keys()):
+            if time.time() > self.ttl_dict[key]:
+                self.cache_dict.pop(key, None)
+                self.ttl_dict.pop(key, None)
 
     def set_cache(self, key, value, **kwargs):
         print_verbose("InMemoryCache: set_cache")
+        if len(self.cache_dict) >= self.max_size_in_memory:
+            # only evict when cache is full
+            self.evict_cache()
+
         self.cache_dict[key] = value
         if "ttl" in kwargs:
             self.ttl_dict[key] = time.time() + kwargs["ttl"]
+        else:
+            self.ttl_dict[key] = time.time() + self.default_ttl
 
     async def async_set_cache(self, key, value, **kwargs):
         self.set_cache(key=key, value=value, **kwargs)

From 60bd5cb6b1b613270eceab3e8cbc0521de3d5b9a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 20:28:58 -0700
Subject: [PATCH 014/150] fix config.yaml

---
 .circleci/config.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 548eab3af..fd1b48a9c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -39,7 +39,6 @@ jobs:
             pip install "boto3==1.34.34"
             pip install "aioboto3==12.3.0"
             pip install langchain
-            pip install "cachetools==5.3.1"
             pip install lunary==0.2.5
             pip install "langfuse==2.27.1"
             pip install "logfire==0.29.0"
@@ -89,7 +88,7 @@ jobs:
           name: Linting Testing
           command: |
             cd litellm
-            python -m pip install types-requests types-setuptools types-redis types-PyYAML types-cachetools
+            python -m pip install types-requests types-setuptools types-redis types-PyYAML
             if ! python -m mypy . --ignore-missing-imports; then
               echo "mypy detected errors"
               exit 1
@@ -124,7 +123,6 @@ jobs:
             pip install pytest
             pip install tiktoken
             pip install aiohttp
-            pip install "cachetools==5.3.1"
             pip install click
             pip install jinja2
             pip install tokenizers
@@ -177,7 +175,6 @@ jobs:
             pip install "google-cloud-aiplatform==1.43.0"
             pip install pyarrow
             pip install "boto3==1.34.34"
-            pip install "cachetools==5.3.1"
             pip install "aioboto3==12.3.0"
             pip install langchain
             pip install "langfuse>=2.0.0"

From b900200c58bb093b546d6b6ccff1c227c36a7ef9 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 20:29:33 -0700
Subject: [PATCH 015/150] fix deps

---
 pyproject.toml   | 1 -
 requirements.txt | 1 -
 2 files changed, 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index af8e050fa..3254ae2e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,6 @@ jinja2 = "^3.1.2"
 aiohttp = "*"
 requests = "^2.31.0"
 pydantic = "^2.0.0"
-cachetools = ">=5.3.1"
 ijson = "*"
 
 uvicorn = {version = "^0.22.0", optional = true}
diff --git a/requirements.txt b/requirements.txt
index 4549ea010..fbf2bfc1d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,7 +42,6 @@ jinja2==3.1.4 # for prompt templates
 certifi==2023.7.22 # [TODO] clean up 
 aiohttp==3.9.0 # for network calls
 aioboto3==12.3.0 # for async sagemaker calls
-cachetools==5.3.1 # for in memory caching
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
 ijson==3.2.3 # for google ai studio streaming

From 05fe43f495582c2fdf70bdaf9ce82ad2df03b311 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 21:21:38 -0700
Subject: [PATCH 016/150] fix default ttl for InMemoryCache

---
 litellm/caching.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index b6921bac8..68f5d98ef 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -69,7 +69,7 @@ class InMemoryCache(BaseCache):
         max_size_in_memory: Optional[int] = 200,
         default_ttl: Optional[
             int
-        ] = 300,  # default ttl is 5 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
+        ] = 600,  # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
     ):
         """
         max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
@@ -77,7 +77,7 @@ class InMemoryCache(BaseCache):
         self.max_size_in_memory = (
             max_size_in_memory or 200
         )  # set an upper bound of 200 items in-memory
-        self.default_ttl = default_ttl or 300
+        self.default_ttl = default_ttl or 600
 
         # in-memory cache
         self.cache_dict: dict = {}

From e89935942747435c28124e4b94b80a4236d85b7e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 08:14:09 -0700
Subject: [PATCH 017/150] ci/cd add debugging for cache eviction

---
 litellm/caching.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/litellm/caching.py b/litellm/caching.py
index 68f5d98ef..19c1431a2 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -97,6 +97,14 @@ class InMemoryCache(BaseCache):
         """
         for key in list(self.ttl_dict.keys()):
             if time.time() > self.ttl_dict[key]:
+                print(  # noqa
+                    "Cache Evicting item key=",
+                    key,
+                    "ttl=",
+                    self.ttl_dict[key],
+                    "size of cache=",
+                    len(self.cache_dict),
+                )
                 self.cache_dict.pop(key, None)
                 self.ttl_dict.pop(key, None)
 

From f479cd549f6895b4da975a4f548b33d2c5370cc1 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 19:10:15 -0700
Subject: [PATCH 018/150] fix(router.py): check if azure returns
 'content_filter' response + fallback available -> fallback

Exception maps azure content filter response exceptions
---
 litellm/proxy/_experimental/out/404.html      |  1 -
 .../proxy/_experimental/out/model_hub.html    |  1 -
 .../proxy/_experimental/out/onboarding.html   |  1 -
 litellm/proxy/_new_secret_config.yaml         |  2 +-
 litellm/router.py                             | 58 +++++++++++++++++++
 litellm/tests/test_router_fallbacks.py        | 30 ++++++++--
 litellm/types/router.py                       |  3 +-
 7 files changed, 85 insertions(+), 11 deletions(-)
 delete mode 100644 litellm/proxy/_experimental/out/404.html
 delete mode 100644 litellm/proxy/_experimental/out/model_hub.html
 delete mode 100644 litellm/proxy/_experimental/out/onboarding.html

diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html
deleted file mode 100644
index 909f71542..000000000
--- a/litellm/proxy/_experimental/out/404.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/ui/_next/static/css/0f6908625573deae.css" crossorigin="" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>404: This page could not be found.</title><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body class="__className_12bbc4"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding:0 23px 0 0;font-size:24px;font-weight:500;vertical-align:top;line-height:49px">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:49px;margin:0">This page could not be found.</h2></div></div></div><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[5613,[],\"\"]\n7:I[31778,[],\"\"]\nd:I[48955,[],\"\"]\n8:{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"}\n9:{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"}\na:{\"display\":\"inline-block\"}\nb:{\"fontSize\":14,\"fontWe"])</script><script>self.__next_f.push([1,"ight\":400,\"lineHeight\":\"49px\",\"margin\":0}\ne:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/_not-found\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L6\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$8\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$9\",\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":\"$a\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$b\",\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$Lc\"],\"globalErrorComponent\":\"$d\",\"missingSlots\":\"$We\"}]]\n"])</script><script>self.__next_f.push([1,"c:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"meta\",\"4\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html
deleted file mode 100644
index ef01db585..000000000
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[87494,[\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"418\",\"static/chunks/app/model_hub/page-ba7819b59161aa64.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/model_hub\",\"initialTree\":[\"\",{\"children\":[\"model_hub\",{\"children\":[\"__PAGE__\",{}]}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"model_hub\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"model_hub\",\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\",\"styles\":null}]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index ff88e53c9..000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[667,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"461\",\"static/chunks/app/onboarding/page-fd30ae439831db99.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/onboarding\",\"initialTree\":[\"\",{\"children\":[\"onboarding\",{\"children\":[\"__PAGE__\",{}]}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"onboarding\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"onboarding\",\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\",\"styles\":null}]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 01f09ca02..7d12f1717 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -7,4 +7,4 @@ model_list:
       tpm: 60
 
 litellm_settings: 
-  callbacks: ["dynamic_rate_limiter"]
\ No newline at end of file
+  callbacks: ["dynamic_rate_limiter"]
diff --git a/litellm/router.py b/litellm/router.py
index df783eab8..e9b0cc00a 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -572,6 +572,18 @@ class Router:
                 f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m"
             )
 
+            ## CHECK CONTENT FILTER ERROR ##
+            if isinstance(response, ModelResponse):
+                _should_raise = self._should_raise_content_policy_error(
+                    model=model, response=response, kwargs=kwargs
+                )
+                if _should_raise:
+                    raise litellm.ContentPolicyViolationError(
+                        message="Response output was blocked.",
+                        model=model,
+                        llm_provider="",
+                    )
+
             return response
         except Exception as e:
             verbose_router_logger.info(
@@ -731,6 +743,18 @@ class Router:
                 await self.async_routing_strategy_pre_call_checks(deployment=deployment)
                 response = await _response
 
+            ## CHECK CONTENT FILTER ERROR ##
+            if isinstance(response, ModelResponse):
+                _should_raise = self._should_raise_content_policy_error(
+                    model=model, response=response, kwargs=kwargs
+                )
+                if _should_raise:
+                    raise litellm.ContentPolicyViolationError(
+                        message="Response output was blocked.",
+                        model=model,
+                        llm_provider="",
+                    )
+
             self.success_calls[model_name] += 1
             verbose_router_logger.info(
                 f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m"
@@ -2867,6 +2891,40 @@ class Router:
             # Catch all - if any exceptions default to cooling down
             return True
 
+    def _should_raise_content_policy_error(
+        self, model: str, response: ModelResponse, kwargs: dict
+    ) -> bool:
+        """
+        Determines if a content policy error should be raised.
+
+        Only raised if a fallback is available.
+
+        Else, original response is returned.
+        """
+        if response.choices[0].finish_reason != "content_filter":
+            return False
+
+        content_policy_fallbacks = kwargs.get(
+            "content_policy_fallbacks", self.content_policy_fallbacks
+        )
+        ### ONLY RAISE ERROR IF CP FALLBACK AVAILABLE ###
+        if content_policy_fallbacks is not None:
+            fallback_model_group = None
+            for item in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                if list(item.keys())[0] == model:
+                    fallback_model_group = item[model]
+                    break
+
+            if fallback_model_group is not None:
+                return True
+
+        verbose_router_logger.info(
+            "Content Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={}".format(
+                model, content_policy_fallbacks
+            )
+        )
+        return False
+
     def _set_cooldown_deployments(
         self,
         original_exception: Any,
diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py
index 545eb23db..99d2a600c 100644
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@@ -1,8 +1,12 @@
 #### What this tests ####
 #    This tests calling router with fallback models
 
-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
 import pytest
 
 sys.path.insert(
@@ -762,9 +766,11 @@ def test_ausage_based_routing_fallbacks():
         # The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
         # It should work with "openai-gpt-4"
         import os
+
+        from dotenv import load_dotenv
+
         import litellm
         from litellm import Router
-        from dotenv import load_dotenv
 
         load_dotenv()
 
@@ -1112,9 +1118,19 @@ async def test_client_side_fallbacks_list(sync_mode):
 
 
 @pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize("content_filter_response_exception", [True, False])
 @pytest.mark.asyncio
-async def test_router_content_policy_fallbacks(sync_mode):
+async def test_router_content_policy_fallbacks(
+    sync_mode, content_filter_response_exception
+):
     os.environ["LITELLM_LOG"] = "DEBUG"
+
+    if content_filter_response_exception:
+        mock_response = Exception("content filtering policy")
+    else:
+        mock_response = litellm.ModelResponse(
+            choices=[litellm.Choices(finish_reason="content_filter")]
+        )
     router = Router(
         model_list=[
             {
@@ -1122,13 +1138,13 @@ async def test_router_content_policy_fallbacks(sync_mode):
                 "litellm_params": {
                     "model": "claude-2",
                     "api_key": "",
-                    "mock_response": Exception("content filtering policy"),
+                    "mock_response": mock_response,
                 },
             },
             {
                 "model_name": "my-fallback-model",
                 "litellm_params": {
-                    "model": "claude-2",
+                    "model": "openai/my-fake-model",
                     "api_key": "",
                     "mock_response": "This works!",
                 },
@@ -1165,3 +1181,5 @@ async def test_router_content_policy_fallbacks(sync_mode):
             model="claude-2",
             messages=[{"role": "user", "content": "Hey, how's it going?"}],
         )
+
+    assert response.model == "my-fake-model"
diff --git a/litellm/types/router.py b/litellm/types/router.py
index 7f043e404..e6864ffe2 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field
 
 from .completion import CompletionRequest
 from .embedding import EmbeddingRequest
+from .utils import ModelResponse
 
 
 class ModelConfig(BaseModel):
@@ -315,7 +316,7 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
     input_cost_per_second: Optional[float]
     output_cost_per_second: Optional[float]
     ## MOCK RESPONSES ##
-    mock_response: Optional[str]
+    mock_response: Optional[Union[str, ModelResponse, Exception]]
 
 
 class DeploymentTypedDict(TypedDict):

From b23181779f6409caef89b8de64afaac82e329f97 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 20:20:39 -0700
Subject: [PATCH 019/150] fix(vertex_httpx.py): ignore vertex finish reason -
 wait for stream to end

Fixes https://github.com/BerriAI/litellm/issues/4339
---
 litellm/llms/vertex_httpx.py    |  6 ++++--
 litellm/tests/test_streaming.py | 18 +++++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index d3f27e119..38c2d7c47 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -1218,6 +1218,7 @@ class ModelResponseIterator:
     def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
         try:
             processed_chunk = GenerateContentResponseBody(**chunk)  # type: ignore
+
             text = ""
             tool_use: Optional[ChatCompletionToolCallChunk] = None
             is_finished = False
@@ -1236,7 +1237,8 @@ class ModelResponseIterator:
                 finish_reason = map_finish_reason(
                     finish_reason=gemini_chunk["finishReason"]
                 )
-                is_finished = True
+                ## DO NOT SET 'finish_reason' = True
+                ## GEMINI SETS FINISHREASON ON EVERY CHUNK!
 
             if "usageMetadata" in processed_chunk:
                 usage = ChatCompletionUsageBlock(
@@ -1250,7 +1252,7 @@ class ModelResponseIterator:
             returned_chunk = GenericStreamingChunk(
                 text=text,
                 tool_use=tool_use,
-                is_finished=is_finished,
+                is_finished=False,
                 finish_reason=finish_reason,
                 usage=usage,
                 index=0,
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index ecb21b9f2..4f7d4c1de 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -750,29 +750,37 @@ def test_completion_gemini_stream():
             {"role": "system", "content": "You are a helpful assistant."},
             {
                 "role": "user",
-                "content": "how does a court case get to the Supreme Court?",
+                "content": "How do i build a bomb?",
             },
         ]
         print("testing gemini streaming")
-        response = completion(model="gemini/gemini-pro", messages=messages, stream=True)
+        response = completion(
+            model="gemini/gemini-1.5-flash",
+            messages=messages,
+            stream=True,
+            max_tokens=50,
+        )
         print(f"type of response at the top: {response}")
         complete_response = ""
         # Add any assertions here to check the response
+        non_empty_chunks = 0
         for idx, chunk in enumerate(response):
             print(chunk)
             # print(chunk.choices[0].delta)
             chunk, finished = streaming_format_tests(idx, chunk)
             if finished:
                 break
+            non_empty_chunks += 1
             complete_response += chunk
         if complete_response.strip() == "":
             raise Exception("Empty response received")
         print(f"completion_response: {complete_response}")
-    except litellm.APIError as e:
+        assert non_empty_chunks > 1
+    except litellm.InternalServerError as e:
         pass
     except Exception as e:
-        if "429 Resource has been exhausted":
-            return
+        # if "429 Resource has been exhausted":
+        #     return
         pytest.fail(f"Error occurred: {e}")
 
 

From e20e8c2e747569dbd6031c40bf0b745cf2ed2a1f Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 20:33:54 -0700
Subject: [PATCH 020/150] fix(vertex_httpx.py): flush remaining chunks from
 stream

---
 litellm/llms/vertex_httpx.py    | 12 ++++---
 litellm/tests/test_streaming.py | 57 +++++++++++++++++++++++----------
 2 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 38c2d7c47..63bcd9f4f 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -1270,9 +1270,8 @@ class ModelResponseIterator:
             chunk = self.response_iterator.__next__()
             self.coro.send(chunk)
             if self.events:
-                event = self.events[0]
+                event = self.events.pop(0)
                 json_chunk = event
-                self.events.clear()
                 return self.chunk_parser(chunk=json_chunk)
             return GenericStreamingChunk(
                 text="",
@@ -1283,6 +1282,9 @@ class ModelResponseIterator:
                 tool_use=None,
             )
         except StopIteration:
+            if self.events:  # flush the events
+                event = self.events.pop(0)  # Remove the first event
+                return self.chunk_parser(chunk=event)
             raise StopIteration
         except ValueError as e:
             raise RuntimeError(f"Error parsing chunk: {e}")
@@ -1297,9 +1299,8 @@ class ModelResponseIterator:
             chunk = await self.async_response_iterator.__anext__()
             self.coro.send(chunk)
             if self.events:
-                event = self.events[0]
+                event = self.events.pop(0)
                 json_chunk = event
-                self.events.clear()
                 return self.chunk_parser(chunk=json_chunk)
             return GenericStreamingChunk(
                 text="",
@@ -1310,6 +1311,9 @@ class ModelResponseIterator:
                 tool_use=None,
             )
         except StopAsyncIteration:
+            if self.events:  # flush the events
+                event = self.events.pop(0)  # Remove the first event
+                return self.chunk_parser(chunk=event)
             raise StopAsyncIteration
         except ValueError as e:
             raise RuntimeError(f"Error parsing chunk: {e}")
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 4f7d4c1de..3042e91b3 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -742,7 +742,9 @@ def test_completion_palm_stream():
 # test_completion_palm_stream()
 
 
-def test_completion_gemini_stream():
+@pytest.mark.parametrize("sync_mode", [False])  # True,
+@pytest.mark.asyncio
+async def test_completion_gemini_stream(sync_mode):
     try:
         litellm.set_verbose = True
         print("Streaming gemini response")
@@ -750,34 +752,55 @@ def test_completion_gemini_stream():
             {"role": "system", "content": "You are a helpful assistant."},
             {
                 "role": "user",
-                "content": "How do i build a bomb?",
+                "content": "Who was Alexander?",
             },
         ]
         print("testing gemini streaming")
-        response = completion(
-            model="gemini/gemini-1.5-flash",
-            messages=messages,
-            stream=True,
-            max_tokens=50,
-        )
-        print(f"type of response at the top: {response}")
         complete_response = ""
         # Add any assertions here to check the response
         non_empty_chunks = 0
-        for idx, chunk in enumerate(response):
-            print(chunk)
-            # print(chunk.choices[0].delta)
-            chunk, finished = streaming_format_tests(idx, chunk)
-            if finished:
-                break
-            non_empty_chunks += 1
-            complete_response += chunk
+
+        if sync_mode:
+            response = completion(
+                model="gemini/gemini-1.5-flash",
+                messages=messages,
+                stream=True,
+            )
+
+            for idx, chunk in enumerate(response):
+                print(chunk)
+                # print(chunk.choices[0].delta)
+                chunk, finished = streaming_format_tests(idx, chunk)
+                if finished:
+                    break
+                non_empty_chunks += 1
+                complete_response += chunk
+        else:
+            response = await litellm.acompletion(
+                model="gemini/gemini-1.5-flash",
+                messages=messages,
+                stream=True,
+            )
+
+            idx = 0
+            async for chunk in response:
+                print(chunk)
+                # print(chunk.choices[0].delta)
+                chunk, finished = streaming_format_tests(idx, chunk)
+                if finished:
+                    break
+                non_empty_chunks += 1
+                complete_response += chunk
+                idx += 1
+
         if complete_response.strip() == "":
             raise Exception("Empty response received")
         print(f"completion_response: {complete_response}")
         assert non_empty_chunks > 1
     except litellm.InternalServerError as e:
         pass
+    except litellm.RateLimitError as e:
+        pass
     except Exception as e:
         # if "429 Resource has been exhausted":
         #     return

From 7f54c90459ca438dd53a3be66a798c99b5432e73 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 21:26:15 -0700
Subject: [PATCH 021/150] 
 fix(add-exception-mapping-+-langfuse-exception-logging-for-streaming-exceptions):
 add exception mapping + langfuse exception logging for streaming exceptions

Fixes https://github.com/BerriAI/litellm/issues/4338
---
 litellm/llms/bedrock_httpx.py         | 113 ++++++++++++++------------
 litellm/proxy/_new_secret_config.yaml |  10 +--
 litellm/proxy/proxy_server.py         |   5 +-
 litellm/utils.py                      |  26 +++++-
 4 files changed, 89 insertions(+), 65 deletions(-)

diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 510bf7c7c..84ab10907 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -1,63 +1,64 @@
 # What is this?
 ## Initial implementation of calling bedrock via httpx client (allows for async calls).
 ## V1 - covers cohere + anthropic claude-3 support
-from functools import partial
-import os, types
+import copy
 import json
-from enum import Enum
-import requests, copy  # type: ignore
+import os
 import time
+import types
+import urllib.parse
+import uuid
+from enum import Enum
+from functools import partial
 from typing import (
+    Any,
+    AsyncIterator,
     Callable,
-    Optional,
+    Iterator,
     List,
     Literal,
-    Union,
-    Any,
-    TypedDict,
+    Optional,
     Tuple,
-    Iterator,
-    AsyncIterator,
-)
-from litellm.utils import (
-    ModelResponse,
-    Usage,
-    CustomStreamWrapper,
-    get_secret,
+    TypedDict,
+    Union,
 )
+
+import httpx  # type: ignore
+import requests  # type: ignore
+
+import litellm
+from litellm.caching import DualCache
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.litellm_logging import Logging
-from litellm.types.utils import Message, Choices
-import litellm, uuid
-from .prompt_templates.factory import (
-    prompt_factory,
-    custom_prompt,
-    cohere_message_pt,
-    construct_tool_use_system_prompt,
-    extract_between_tags,
-    parse_xml_params,
-    contains_tag,
-    _bedrock_converse_messages_pt,
-    _bedrock_tools_pt,
-)
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
     _get_async_httpx_client,
     _get_httpx_client,
 )
-from .base import BaseLLM
-import httpx  # type: ignore
-from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
 from litellm.types.llms.bedrock import *
-import urllib.parse
 from litellm.types.llms.openai import (
+    ChatCompletionDeltaChunk,
     ChatCompletionResponseMessage,
     ChatCompletionToolCallChunk,
     ChatCompletionToolCallFunctionChunk,
-    ChatCompletionDeltaChunk,
 )
-from litellm.caching import DualCache
+from litellm.types.utils import Choices, Message
+from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret
+
+from .base import BaseLLM
+from .bedrock import BedrockError, ModelResponseIterator, convert_messages_to_prompt
+from .prompt_templates.factory import (
+    _bedrock_converse_messages_pt,
+    _bedrock_tools_pt,
+    cohere_message_pt,
+    construct_tool_use_system_prompt,
+    contains_tag,
+    custom_prompt,
+    extract_between_tags,
+    parse_xml_params,
+    prompt_factory,
+)
 
 iam_cache = DualCache()
 
@@ -171,26 +172,34 @@ async def make_call(
     messages: list,
     logging_obj,
 ):
-    if client is None:
-        client = _get_async_httpx_client()  # Create a new client if none provided
+    try:
+        if client is None:
+            client = _get_async_httpx_client()  # Create a new client if none provided
 
-    response = await client.post(api_base, headers=headers, data=data, stream=True)
+        response = await client.post(api_base, headers=headers, data=data, stream=True)
 
-    if response.status_code != 200:
-        raise BedrockError(status_code=response.status_code, message=response.text)
+        if response.status_code != 200:
+            raise BedrockError(status_code=response.status_code, message=response.text)
 
-    decoder = AWSEventStreamDecoder(model=model)
-    completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
+        decoder = AWSEventStreamDecoder(model=model)
+        completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
 
-    # LOGGING
-    logging_obj.post_call(
-        input=messages,
-        api_key="",
-        original_response="first stream response received",
-        additional_args={"complete_input_dict": data},
-    )
+        # LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key="",
+            original_response="first stream response received",
+            additional_args={"complete_input_dict": data},
+        )
 
-    return completion_stream
+        return completion_stream
+    except httpx.HTTPStatusError as err:
+        error_code = err.response.status_code
+        raise BedrockError(status_code=error_code, message=str(err))
+    except httpx.TimeoutException as e:
+        raise BedrockError(status_code=408, message="Timeout error occurred.")
+    except Exception as e:
+        raise BedrockError(status_code=500, message=str(e))
 
 
 def make_sync_call(
@@ -704,7 +713,6 @@ class BedrockLLM(BaseLLM):
     ) -> Union[ModelResponse, CustomStreamWrapper]:
         try:
             import boto3
-
             from botocore.auth import SigV4Auth
             from botocore.awsrequest import AWSRequest
             from botocore.credentials import Credentials
@@ -1650,7 +1658,6 @@ class BedrockConverseLLM(BaseLLM):
     ):
         try:
             import boto3
-
             from botocore.auth import SigV4Auth
             from botocore.awsrequest import AWSRequest
             from botocore.credentials import Credentials
@@ -1904,8 +1911,8 @@ class BedrockConverseLLM(BaseLLM):
 
 
 def get_response_stream_shape():
-    from botocore.model import ServiceModel
     from botocore.loaders import Loader
+    from botocore.model import ServiceModel
 
     loader = Loader()
     bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 7d12f1717..640a3b2cf 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,10 +1,10 @@
 model_list: 
   - model_name: my-fake-model
     litellm_params:
-      model: gpt-3.5-turbo
+      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
       api_key: my-fake-key
-      mock_response: hello-world
-      tpm: 60
+      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
 
-litellm_settings: 
-  callbacks: ["dynamic_rate_limiter"]
+litellm_settings:
+  success_callback: ["langfuse"]
+  failure_callback: ["langfuse"]
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 4cac93b24..30b90abe6 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2526,11 +2526,10 @@ async def async_data_generator(
         yield f"data: {done_message}\n\n"
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
-                str(e)
+            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}\n{}".format(
+                str(e), traceback.format_exc()
             )
         )
-        verbose_proxy_logger.debug(traceback.format_exc())
         await proxy_logging_obj.post_call_failure_hook(
             user_api_key_dict=user_api_key_dict,
             original_exception=e,
diff --git a/litellm/utils.py b/litellm/utils.py
index 19d99ff59..0849ba3a2 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -9595,6 +9595,11 @@ class CustomStreamWrapper:
                 litellm.request_timeout
             )
             if self.logging_obj is not None:
+                ## LOGGING
+                threading.Thread(
+                    target=self.logging_obj.failure_handler,
+                    args=(e, traceback_exception),
+                ).start()  # log response
                 # Handle any exceptions that might occur during streaming
                 asyncio.create_task(
                     self.logging_obj.async_failure_handler(e, traceback_exception)
@@ -9602,11 +9607,24 @@ class CustomStreamWrapper:
             raise e
         except Exception as e:
             traceback_exception = traceback.format_exc()
-            # Handle any exceptions that might occur during streaming
-            asyncio.create_task(
-                self.logging_obj.async_failure_handler(e, traceback_exception)  # type: ignore
+            if self.logging_obj is not None:
+                ## LOGGING
+                threading.Thread(
+                    target=self.logging_obj.failure_handler,
+                    args=(e, traceback_exception),
+                ).start()  # log response
+                # Handle any exceptions that might occur during streaming
+                asyncio.create_task(
+                    self.logging_obj.async_failure_handler(e, traceback_exception)  # type: ignore
+                )
+            ## Map to OpenAI Exception
+            raise exception_type(
+                model=self.model,
+                custom_llm_provider=self.custom_llm_provider,
+                original_exception=e,
+                completion_kwargs={},
+                extra_kwargs={},
             )
-            raise e
 
 
 class TextCompletionStreamWrapper:

From efae9fa9911c44a5eeea29af705955ed660828fd Mon Sep 17 00:00:00 2001
From: Marc Abramowitz <abramowi@adobe.com>
Date: Fri, 21 Jun 2024 20:21:19 -0700
Subject: [PATCH 022/150] Turn on message logging via request header

---
 litellm/litellm_core_utils/redact_messages.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py
index 8f270d8be..91f340cb8 100644
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@@ -32,6 +32,10 @@ def redact_message_input_output_from_logging(
     if litellm.turn_off_message_logging is not True:
         return result
 
+    request_headers = litellm_logging_obj.model_call_details['litellm_params']['metadata']['headers']
+    if request_headers and request_headers.get('litellm-turn-on-message-logging', False):
+        return result
+
     # remove messages, prompts, input, response from logging
     litellm_logging_obj.model_call_details["messages"] = [
         {"role": "user", "content": "redacted-by-litellm"}

From d14138dbd96668edcb67cb173752c04e0147fc07 Mon Sep 17 00:00:00 2001
From: Marc Abramowitz <abramowi@adobe.com>
Date: Fri, 21 Jun 2024 21:52:55 -0700
Subject: [PATCH 023/150] Rename request header

---
 litellm/litellm_core_utils/redact_messages.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py
index 91f340cb8..cc616afec 100644
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@@ -28,12 +28,13 @@ def redact_message_input_output_from_logging(
     Removes messages, prompts, input, response from logging. This modifies the data in-place
     only redacts when litellm.turn_off_message_logging == True
     """
+    request_headers = litellm_logging_obj.model_call_details['litellm_params']['metadata']['headers']
+
     # check if user opted out of logging message/response to callbacks
-    if litellm.turn_off_message_logging is not True:
+    if litellm.turn_off_message_logging is not True and request_headers.get('litellm-enable-message-redaction', False):
         return result
 
-    request_headers = litellm_logging_obj.model_call_details['litellm_params']['metadata']['headers']
-    if request_headers and request_headers.get('litellm-turn-on-message-logging', False):
+    if request_headers and request_headers.get('litellm-disable-message-redaction', False):
         return result
 
     # remove messages, prompts, input, response from logging

From 379b83448012e1a350895c2d9d852e532f24bc0c Mon Sep 17 00:00:00 2001
From: Marc Abramowitz <abramowi@adobe.com>
Date: Fri, 21 Jun 2024 22:10:31 -0700
Subject: [PATCH 024/150] Document feature

---
 docs/my-website/docs/proxy/logging.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md
index e9be2b837..f9ed5db3d 100644
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@@ -210,6 +210,24 @@ litellm_settings:
   turn_off_message_logging: True
 ```
 
+If you have this feature turned on, you can override it for specific requests by
+setting a request header `LiteLLM-Disable-Message-Redaction: true`.
+
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --header 'Content-Type: application/json' \
+    --header 'LiteLLM-Disable-Message-Redaction: true' \
+    --data '{
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what llm are you"
+        }
+    ]
+}'
+```
+
 ### 🔧 Debugging - Viewing RAW CURL sent from LiteLLM to provider
 
 Use this when you want to view the RAW curl request sent from LiteLLM to the LLM API 

From 544338bdf4315702968443e3d0fe773580e2b3b6 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 21:34:55 -0700
Subject: [PATCH 025/150] =?UTF-8?q?bump:=20version=201.40.24=20=E2=86=92?=
 =?UTF-8?q?=201.40.25?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3254ae2e2..fc3526dcc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.24"
+version = "1.40.25"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.40.24"
+version = "1.40.25"
 version_files = [
     "pyproject.toml:^version"
 ]

From 097121947061ba36ba34a587990cc4e3a4702d33 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 21:38:01 -0700
Subject: [PATCH 026/150] docs(team_budgets.md): cleanup docs

---
 docs/my-website/docs/proxy/team_budgets.md | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/docs/my-website/docs/proxy/team_budgets.md b/docs/my-website/docs/proxy/team_budgets.md
index 9ab0c0786..7d5284de7 100644
--- a/docs/my-website/docs/proxy/team_budgets.md
+++ b/docs/my-website/docs/proxy/team_budgets.md
@@ -156,7 +156,7 @@ litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-
 
 Prevent projects from gobbling too much quota. 
 
-Dynamically allocate TPM quota to api keys, based on active keys in that minute.
+Dynamically allocate TPM quota to api keys, based on active keys in that minute. [**See Code**](https://github.com/BerriAI/litellm/blob/9bffa9a48e610cc6886fc2dce5c1815aeae2ad46/litellm/proxy/hooks/dynamic_rate_limiter.py#L125)
 
 1. Setup config.yaml 
 
@@ -192,12 +192,7 @@ litellm --config /path/to/config.yaml
 - Mock response returns 30 total tokens / request
 - Each team will only be able to make 1 request per minute
 """
-"""
-- Run 2 concurrent teams calling same model
-- model has 60 TPM
-- Mock response returns 30 total tokens / request
-- Each team will only be able to make 1 request per minute
-"""
+
 import requests
 from openai import OpenAI, RateLimitError
 

From 2834a6395ad1bd0e5208b11945ab8edfa98f6913 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 21:57:54 -0700
Subject: [PATCH 027/150] fix(redact_messages.py): fix get

---
 litellm/litellm_core_utils/redact_messages.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py
index cc616afec..fa4308da9 100644
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@@ -9,6 +9,7 @@
 
 import copy
 from typing import TYPE_CHECKING, Any
+
 import litellm
 
 if TYPE_CHECKING:
@@ -28,13 +29,24 @@ def redact_message_input_output_from_logging(
     Removes messages, prompts, input, response from logging. This modifies the data in-place
     only redacts when litellm.turn_off_message_logging == True
     """
-    request_headers = litellm_logging_obj.model_call_details['litellm_params']['metadata']['headers']
+    _request_headers = (
+        litellm_logging_obj.model_call_details.get("litellm_params", {}).get(
+            "metadata", {}
+        )
+        or {}
+    )
+
+    request_headers = _request_headers.get("headers", {})
 
     # check if user opted out of logging message/response to callbacks
-    if litellm.turn_off_message_logging is not True and request_headers.get('litellm-enable-message-redaction', False):
+    if litellm.turn_off_message_logging is not True and request_headers.get(
+        "litellm-enable-message-redaction", False
+    ):
         return result
 
-    if request_headers and request_headers.get('litellm-disable-message-redaction', False):
+    if request_headers and request_headers.get(
+        "litellm-disable-message-redaction", False
+    ):
         return result
 
     # remove messages, prompts, input, response from logging

From 3c92467ae84b5ff078db1e68d6c2d718a11836d7 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 22:43:56 -0700
Subject: [PATCH 028/150] fix(test_dynamic_rate_limit_handler.py): cleanup

---
 litellm/tests/test_dynamic_rate_limit_handler.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/litellm/tests/test_dynamic_rate_limit_handler.py b/litellm/tests/test_dynamic_rate_limit_handler.py
index c3fcca6a6..6e1b55d18 100644
--- a/litellm/tests/test_dynamic_rate_limit_handler.py
+++ b/litellm/tests/test_dynamic_rate_limit_handler.py
@@ -214,23 +214,23 @@ async def test_base_case(dynamic_rate_limit_handler, mock_response):
 
     prev_availability: Optional[int] = None
     allowed_fails = 1
-    for _ in range(5):
+    for _ in range(2):
         try:
             # check availability
             availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
                 model=model
             )
 
-            ## assert availability updated
-            if prev_availability is not None and availability is not None:
-                assert availability == prev_availability - 10
-
             print(
                 "prev_availability={}, availability={}".format(
                     prev_availability, availability
                 )
             )
 
+            ## assert availability updated
+            if prev_availability is not None and availability is not None:
+                assert availability == prev_availability - 10
+
             prev_availability = availability
 
             # make call

From c450dee681267ef2e04b097ab3a1d6f07ca4a016 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 23:27:13 -0700
Subject: [PATCH 029/150] fix(redact_messages.py): fix pr

---
 litellm/litellm_core_utils/redact_messages.py |   5 +-
 litellm/tests/langfuse.log                    | 206 +++++++++++++-----
 2 files changed, 151 insertions(+), 60 deletions(-)

diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py
index fa4308da9..378c46ba0 100644
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@@ -39,8 +39,9 @@ def redact_message_input_output_from_logging(
     request_headers = _request_headers.get("headers", {})
 
     # check if user opted out of logging message/response to callbacks
-    if litellm.turn_off_message_logging is not True and request_headers.get(
-        "litellm-enable-message-redaction", False
+    if (
+        litellm.turn_off_message_logging is not True
+        and request_headers.get("litellm-enable-message-redaction", False) is not True
     ):
         return result
 
diff --git a/litellm/tests/langfuse.log b/litellm/tests/langfuse.log
index 61bc6ada5..1921f3136 100644
--- a/litellm/tests/langfuse.log
+++ b/litellm/tests/langfuse.log
@@ -1,77 +1,167 @@
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+Creating trace id='52a58bac-492b-433e-9228-2759b73303a6' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 45, 565911, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+Creating trace id='28bc21fe-5955-4ec5-ba39-27325718af5a' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 45, 566213, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+Creating generation trace_id='52a58bac-492b-433e-9228-2759b73303a6' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 561383) metadata={'litellm_response_cost': None, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-45-561383_chatcmpl-193fd5b6-87ce-4b8f-90bb-e2c2608f0f73' end_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 564028) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 564028) model='chatgpt-v-2' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None) prompt_name=None prompt_version=None...
+Creating generation trace_id='28bc21fe-5955-4ec5-ba39-27325718af5a' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 562146) metadata={'litellm_response_cost': None, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-45-562146_chatcmpl-2dc26df5-d4e4-46f5-868e-138aac85dd95' end_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 564312) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 564312) model='chatgpt-v-2' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None) prompt_name=None prompt_version=None...
+item size 459
+Creating trace id='f545a5c8-dfdf-4226-a30c-f24ff8d75144' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 45, 567765, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+item size 459
+Creating trace id='c8d266ca-c370-439e-9d14-f011e5cfa254' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 45, 568137, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+Creating generation trace_id='f545a5c8-dfdf-4226-a30c-f24ff8d75144' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 562753) metadata={'litellm_response_cost': None, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-45-562753_chatcmpl-33ae3e6d-d66a-4447-82d9-c8f5d5be43e5' end_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 564869) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 564869) model='chatgpt-v-2' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None) prompt_name=None prompt_version=None...
+item size 887
+Creating generation trace_id='c8d266ca-c370-439e-9d14-f011e5cfa254' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 563300) metadata={'litellm_response_cost': None, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-45-563300_chatcmpl-56c11246-4c9c-43c0-bb4e-0be309907acd' end_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 565142) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 565142) model='chatgpt-v-2' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None) prompt_name=None prompt_version=None...
+item size 887
+item size 459
+item size 459
+item size 887
+item size 887
+Creating trace id='7c6fec55-def1-4838-8ea1-86960a1ccb19' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 45, 570331, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+Creating generation trace_id='7c6fec55-def1-4838-8ea1-86960a1ccb19' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 563792) metadata={'litellm_response_cost': None, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': "It's simple to use and easy to get started", 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-45-563792_chatcmpl-c159069a-bc65-43a0-bef5-e2d42688cead' end_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 569384) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 45, 569384) model='chatgpt-v-2' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None) prompt_name=None prompt_version=None...
+item size 459
+item size 887
+~0 items in the Langfuse queue
+uploading batch of 10 items
+uploading data: {'batch': [{'id': 'cd6c78ba-81aa-4106-bc92-48adbda0ef1b', 'type': 'trace-create', 'body': {'id': '52a58bac-492b-433e-9228-2759b73303a6', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 565911, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 566569, tzinfo=datetime.timezone.utc)}, {'id': '57b678c1-d620-4aad-8052-1722a498972e', 'type': 'trace-create', 'body': {'id': '28bc21fe-5955-4ec5-ba39-27325718af5a', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 566213, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 566947, tzinfo=datetime.timezone.utc)}, {'id': '831370be-b2bd-48d8-b32b-bfcaf103712b', 'type': 'generation-create', 'body': {'traceId': '52a58bac-492b-433e-9228-2759b73303a6', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 561383), 'metadata': {'litellm_response_cost': None, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-45-561383_chatcmpl-193fd5b6-87ce-4b8f-90bb-e2c2608f0f73', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 564028), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 564028), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 567294, tzinfo=datetime.timezone.utc)}, {'id': '571fe93d-34b4-405e-98b4-e47b538b884a', 'type': 'generation-create', 'body': {'traceId': '28bc21fe-5955-4ec5-ba39-27325718af5a', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 562146), 'metadata': {'litellm_response_cost': None, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-45-562146_chatcmpl-2dc26df5-d4e4-46f5-868e-138aac85dd95', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 564312), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 564312), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 567688, tzinfo=datetime.timezone.utc)}, {'id': '13ae52b9-7480-4b2e-977c-e85f422f9a16', 'type': 'trace-create', 'body': {'id': 'f545a5c8-dfdf-4226-a30c-f24ff8d75144', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 567765, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 568357, tzinfo=datetime.timezone.utc)}, {'id': '7498e67e-0b2b-451c-8533-a35de0aed092', 'type': 'trace-create', 'body': {'id': 'c8d266ca-c370-439e-9d14-f011e5cfa254', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 568137, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 568812, tzinfo=datetime.timezone.utc)}, {'id': '2656f364-b367-442a-a694-19dd159a0769', 'type': 'generation-create', 'body': {'traceId': 'f545a5c8-dfdf-4226-a30c-f24ff8d75144', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 562753), 'metadata': {'litellm_response_cost': None, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-45-562753_chatcmpl-33ae3e6d-d66a-4447-82d9-c8f5d5be43e5', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 564869), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 564869), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 569165, tzinfo=datetime.timezone.utc)}, {'id': '8c42f89e-be59-4226-812e-bc849d35ab59', 'type': 'generation-create', 'body': {'traceId': 'c8d266ca-c370-439e-9d14-f011e5cfa254', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 563300), 'metadata': {'litellm_response_cost': None, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-45-563300_chatcmpl-56c11246-4c9c-43c0-bb4e-0be309907acd', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 565142), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 565142), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 569494, tzinfo=datetime.timezone.utc)}, {'id': 'a926d1eb-68ed-484c-a9b9-3d82938a7d28', 'type': 'trace-create', 'body': {'id': '7c6fec55-def1-4838-8ea1-86960a1ccb19', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 570331, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 570495, tzinfo=datetime.timezone.utc)}, {'id': '97b5dee7-a3b2-4526-91cb-75dac909c78f', 'type': 'generation-create', 'body': {'traceId': '7c6fec55-def1-4838-8ea1-86960a1ccb19', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 563792), 'metadata': {'litellm_response_cost': None, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-45-563792_chatcmpl-c159069a-bc65-43a0-bef5-e2d42688cead', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 569384), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 45, 569384), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 45, 570858, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 10, 'sdk_integration': 'default', 'sdk_name': 'python', 'sdk_version': '2.32.0', 'public_key': 'pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003'}}
+making request: {"batch": [{"id": "cd6c78ba-81aa-4106-bc92-48adbda0ef1b", "type": "trace-create", "body": {"id": "52a58bac-492b-433e-9228-2759b73303a6", "timestamp": "2024-06-23T06:26:45.565911Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:45.566569Z"}, {"id": "57b678c1-d620-4aad-8052-1722a498972e", "type": "trace-create", "body": {"id": "28bc21fe-5955-4ec5-ba39-27325718af5a", "timestamp": "2024-06-23T06:26:45.566213Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:45.566947Z"}, {"id": "831370be-b2bd-48d8-b32b-bfcaf103712b", "type": "generation-create", "body": {"traceId": "52a58bac-492b-433e-9228-2759b73303a6", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:45.561383-07:00", "metadata": {"litellm_response_cost": null, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-45-561383_chatcmpl-193fd5b6-87ce-4b8f-90bb-e2c2608f0f73", "endTime": "2024-06-22T23:26:45.564028-07:00", "completionStartTime": "2024-06-22T23:26:45.564028-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-06-23T06:26:45.567294Z"}, {"id": "571fe93d-34b4-405e-98b4-e47b538b884a", "type": "generation-create", "body": {"traceId": "28bc21fe-5955-4ec5-ba39-27325718af5a", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:45.562146-07:00", "metadata": {"litellm_response_cost": null, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-45-562146_chatcmpl-2dc26df5-d4e4-46f5-868e-138aac85dd95", "endTime": "2024-06-22T23:26:45.564312-07:00", "completionStartTime": "2024-06-22T23:26:45.564312-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-06-23T06:26:45.567688Z"}, {"id": "13ae52b9-7480-4b2e-977c-e85f422f9a16", "type": "trace-create", "body": {"id": "f545a5c8-dfdf-4226-a30c-f24ff8d75144", "timestamp": "2024-06-23T06:26:45.567765Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:45.568357Z"}, {"id": "7498e67e-0b2b-451c-8533-a35de0aed092", "type": "trace-create", "body": {"id": "c8d266ca-c370-439e-9d14-f011e5cfa254", "timestamp": "2024-06-23T06:26:45.568137Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:45.568812Z"}, {"id": "2656f364-b367-442a-a694-19dd159a0769", "type": "generation-create", "body": {"traceId": "f545a5c8-dfdf-4226-a30c-f24ff8d75144", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:45.562753-07:00", "metadata": {"litellm_response_cost": null, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-45-562753_chatcmpl-33ae3e6d-d66a-4447-82d9-c8f5d5be43e5", "endTime": "2024-06-22T23:26:45.564869-07:00", "completionStartTime": "2024-06-22T23:26:45.564869-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-06-23T06:26:45.569165Z"}, {"id": "8c42f89e-be59-4226-812e-bc849d35ab59", "type": "generation-create", "body": {"traceId": "c8d266ca-c370-439e-9d14-f011e5cfa254", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:45.563300-07:00", "metadata": {"litellm_response_cost": null, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-45-563300_chatcmpl-56c11246-4c9c-43c0-bb4e-0be309907acd", "endTime": "2024-06-22T23:26:45.565142-07:00", "completionStartTime": "2024-06-22T23:26:45.565142-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-06-23T06:26:45.569494Z"}, {"id": "a926d1eb-68ed-484c-a9b9-3d82938a7d28", "type": "trace-create", "body": {"id": "7c6fec55-def1-4838-8ea1-86960a1ccb19", "timestamp": "2024-06-23T06:26:45.570331Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:45.570495Z"}, {"id": "97b5dee7-a3b2-4526-91cb-75dac909c78f", "type": "generation-create", "body": {"traceId": "7c6fec55-def1-4838-8ea1-86960a1ccb19", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:45.563792-07:00", "metadata": {"litellm_response_cost": null, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-45-563792_chatcmpl-c159069a-bc65-43a0-bef5-e2d42688cead", "endTime": "2024-06-22T23:26:45.569384-07:00", "completionStartTime": "2024-06-22T23:26:45.569384-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-06-23T06:26:45.570858Z"}], "metadata": {"batch_size": 10, "sdk_integration": "default", "sdk_name": "python", "sdk_version": "2.32.0", "public_key": "pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003"}} to https://us.cloud.langfuse.com/api/public/ingestion
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+received response: {"errors":[],"successes":[{"id":"cd6c78ba-81aa-4106-bc92-48adbda0ef1b","status":201},{"id":"57b678c1-d620-4aad-8052-1722a498972e","status":201},{"id":"831370be-b2bd-48d8-b32b-bfcaf103712b","status":201},{"id":"571fe93d-34b4-405e-98b4-e47b538b884a","status":201},{"id":"13ae52b9-7480-4b2e-977c-e85f422f9a16","status":201},{"id":"7498e67e-0b2b-451c-8533-a35de0aed092","status":201},{"id":"2656f364-b367-442a-a694-19dd159a0769","status":201},{"id":"8c42f89e-be59-4226-812e-bc849d35ab59","status":201},{"id":"a926d1eb-68ed-484c-a9b9-3d82938a7d28","status":201},{"id":"97b5dee7-a3b2-4526-91cb-75dac909c78f","status":201}]}
+successfully uploaded batch of 10 items
+~0 items in the Langfuse queue
 consumer is running...
-Creating trace id='litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 22, 420643, tzinfo=datetime.timezone.utc) name='litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' user_id='litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} session_id='litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' release='litellm-test-release' version='litellm-test-version' metadata={'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'} tags=['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False'] public=None
-adding task {'id': '9d380abe-bb42-480b-b48f-952ed6776e1c', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 420643, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}}
-Creating generation trace_id='litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' name='litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 419075) metadata={'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version='litellm-test-version' id='litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' end_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 419879) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 419879) model='gpt-3.5-turbo' model_parameters={'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
-item size 1224
-adding task {'id': '0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'version': 'litellm-test-version', 'id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}}
-item size 1359
-Creating trace id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 22, 423093, tzinfo=datetime.timezone.utc) name='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' user_id='litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} session_id='litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' release='litellm-test-release' version='litellm-test-version' metadata={'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'} tags=['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False'] public=None
-adding task {'id': '1b34abb5-4a24-4042-a8c3-9f3ea0254f2b', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423093, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}}
-Creating generation trace_id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' name='litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 421978) metadata={'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version='litellm-test-version' id='litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' end_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 422551) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 422551) model='gpt-3.5-turbo' model_parameters={'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
-item size 1224
-adding task {'id': '050ba9cd-3eff-443b-9637-705406ceb8cb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 421978), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}}
-item size 1359
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+Creating trace id='litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 47, 529980, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]} output={'content': 'redacted-by-litellm', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
 flushing queue
+Creating generation trace_id='litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 47, 528930) metadata={'litellm_response_cost': 5.4999999999999995e-05, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]} output={'content': 'redacted-by-litellm', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-47-528930_chatcmpl-811d9755-120c-4934-9efd-5ec08b8c41c6' end_time=datetime.datetime(2024, 6, 22, 23, 26, 47, 529521) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 47, 529521) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
+item size 454
 successfully flushed about 0 items.
-Creating trace id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 22, 425221, tzinfo=datetime.timezone.utc) name=None user_id=None input=None output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} session_id=None release=None version=None metadata={'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'} tags=None public=None
-adding task {'id': 'd5173131-5bef-46cd-aa5a-6dcd01f6c000', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425221, tzinfo=datetime.timezone.utc), 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}}}
-Creating generation trace_id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' name='litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 424075) metadata={'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version='litellm-test-version' id='litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' end_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 424526) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 424526) model='gpt-3.5-turbo' model_parameters={'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
-item size 630
-adding task {'id': 'ed61fc8d-aede-4c33-9ce8-377d498169f4', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}}
-uploading batch of 15 items
-uploading data: {'batch': [{'id': 'e3840349-5e27-4921-84fc-f11ac428b7c5', 'type': 'trace-create', 'body': {'id': '77e94058-6f8a-43bc-97ef-1a8d4966592c', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 289521, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 289935, tzinfo=datetime.timezone.utc)}, {'id': '54036ec0-06ff-44d1-ac3a-f6085a3983ab', 'type': 'generation-create', 'body': {'traceId': '77e94058-6f8a-43bc-97ef-1a8d4966592c', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 21, 970003), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-21-970003_chatcmpl-30ccf23d-ac57-4183-ab2f-b93f084c4187', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 286720), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 286720), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 290909, tzinfo=datetime.timezone.utc)}, {'id': '4bf88864-4937-48a4-8e9b-b1cf6a29cc5c', 'type': 'trace-create', 'body': {'id': '04190fd5-8a1f-41d9-b8be-878945c35293', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 292743, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 292929, tzinfo=datetime.timezone.utc)}, {'id': '050a1ed2-b54e-46ab-9145-04baca33524e', 'type': 'generation-create', 'body': {'traceId': '04190fd5-8a1f-41d9-b8be-878945c35293', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 282826), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-22-282826_chatcmpl-684e7a99-b0ad-43e3-a0e9-acbce76e5457', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288054), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288054), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 293730, tzinfo=datetime.timezone.utc)}, {'id': '94b80fdf-7df9-4b69-8500-df55a4748802', 'type': 'trace-create', 'body': {'id': '82588025-780b-4045-a131-06dcaf2c54ca', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 293803, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 293964, tzinfo=datetime.timezone.utc)}, {'id': '659db88e-6adc-4c52-82d8-dac517773242', 'type': 'generation-create', 'body': {'traceId': '82588025-780b-4045-a131-06dcaf2c54ca', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 280988), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-22-280988_chatcmpl-4ecaabdd-be67-4122-a3bf-b95466ffee0a', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 287168), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 287168), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 294336, tzinfo=datetime.timezone.utc)}, {'id': '383b9518-93ff-4943-ae0c-b3256ee3c1a7', 'type': 'trace-create', 'body': {'id': 'fe18bb31-ded9-4ad2-8417-41e0e3de195c', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 295711, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 295870, tzinfo=datetime.timezone.utc)}, {'id': '127d6d13-e8b0-44f2-bba1-cc5d9710b0b4', 'type': 'generation-create', 'body': {'traceId': 'fe18bb31-ded9-4ad2-8417-41e0e3de195c', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 284370), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-22-284370_chatcmpl-bf93ab8e-ecf2-4f04-9506-ef51a1c4c9d0', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288779), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288779), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 296237, tzinfo=datetime.timezone.utc)}, {'id': '2bc81d4d-f6a5-415b-acaa-feb883c41bbb', 'type': 'trace-create', 'body': {'id': '99b7014a-c3c0-4040-92ad-64a665ab6abe', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 297355, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 297502, tzinfo=datetime.timezone.utc)}, {'id': 'd51705a9-088a-4f49-b494-f4fa8e6bc59e', 'type': 'generation-create', 'body': {'traceId': '99b7014a-c3c0-4040-92ad-64a665ab6abe', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 285989), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-22-285989_chatcmpl-1a3c46e4-d474-4d19-92d8-8a7ee7ac0799', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 295600), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 295600), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 297845, tzinfo=datetime.timezone.utc)}, {'id': '9d380abe-bb42-480b-b48f-952ed6776e1c', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 420643, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 421233, tzinfo=datetime.timezone.utc)}, {'id': '0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'version': 'litellm-test-version', 'id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 421804, tzinfo=datetime.timezone.utc)}, {'id': '1b34abb5-4a24-4042-a8c3-9f3ea0254f2b', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423093, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423311, tzinfo=datetime.timezone.utc)}, {'id': '050ba9cd-3eff-443b-9637-705406ceb8cb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 421978), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423829, tzinfo=datetime.timezone.utc)}, {'id': 'd5173131-5bef-46cd-aa5a-6dcd01f6c000', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425221, tzinfo=datetime.timezone.utc), 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425370, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 15, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}}
-making request: {"batch": [{"id": "e3840349-5e27-4921-84fc-f11ac428b7c5", "type": "trace-create", "body": {"id": "77e94058-6f8a-43bc-97ef-1a8d4966592c", "timestamp": "2024-05-07T20:11:22.289521Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.289935Z"}, {"id": "54036ec0-06ff-44d1-ac3a-f6085a3983ab", "type": "generation-create", "body": {"traceId": "77e94058-6f8a-43bc-97ef-1a8d4966592c", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:21.970003-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-21-970003_chatcmpl-30ccf23d-ac57-4183-ab2f-b93f084c4187", "endTime": "2024-05-07T13:11:22.286720-07:00", "completionStartTime": "2024-05-07T13:11:22.286720-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.290909Z"}, {"id": "4bf88864-4937-48a4-8e9b-b1cf6a29cc5c", "type": "trace-create", "body": {"id": "04190fd5-8a1f-41d9-b8be-878945c35293", "timestamp": "2024-05-07T20:11:22.292743Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.292929Z"}, {"id": "050a1ed2-b54e-46ab-9145-04baca33524e", "type": "generation-create", "body": {"traceId": "04190fd5-8a1f-41d9-b8be-878945c35293", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.282826-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-282826_chatcmpl-684e7a99-b0ad-43e3-a0e9-acbce76e5457", "endTime": "2024-05-07T13:11:22.288054-07:00", "completionStartTime": "2024-05-07T13:11:22.288054-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.293730Z"}, {"id": "94b80fdf-7df9-4b69-8500-df55a4748802", "type": "trace-create", "body": {"id": "82588025-780b-4045-a131-06dcaf2c54ca", "timestamp": "2024-05-07T20:11:22.293803Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.293964Z"}, {"id": "659db88e-6adc-4c52-82d8-dac517773242", "type": "generation-create", "body": {"traceId": "82588025-780b-4045-a131-06dcaf2c54ca", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.280988-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-280988_chatcmpl-4ecaabdd-be67-4122-a3bf-b95466ffee0a", "endTime": "2024-05-07T13:11:22.287168-07:00", "completionStartTime": "2024-05-07T13:11:22.287168-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.294336Z"}, {"id": "383b9518-93ff-4943-ae0c-b3256ee3c1a7", "type": "trace-create", "body": {"id": "fe18bb31-ded9-4ad2-8417-41e0e3de195c", "timestamp": "2024-05-07T20:11:22.295711Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.295870Z"}, {"id": "127d6d13-e8b0-44f2-bba1-cc5d9710b0b4", "type": "generation-create", "body": {"traceId": "fe18bb31-ded9-4ad2-8417-41e0e3de195c", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.284370-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-284370_chatcmpl-bf93ab8e-ecf2-4f04-9506-ef51a1c4c9d0", "endTime": "2024-05-07T13:11:22.288779-07:00", "completionStartTime": "2024-05-07T13:11:22.288779-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.296237Z"}, {"id": "2bc81d4d-f6a5-415b-acaa-feb883c41bbb", "type": "trace-create", "body": {"id": "99b7014a-c3c0-4040-92ad-64a665ab6abe", "timestamp": "2024-05-07T20:11:22.297355Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.297502Z"}, {"id": "d51705a9-088a-4f49-b494-f4fa8e6bc59e", "type": "generation-create", "body": {"traceId": "99b7014a-c3c0-4040-92ad-64a665ab6abe", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.285989-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-285989_chatcmpl-1a3c46e4-d474-4d19-92d8-8a7ee7ac0799", "endTime": "2024-05-07T13:11:22.295600-07:00", "completionStartTime": "2024-05-07T13:11:22.295600-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.297845Z"}, {"id": "9d380abe-bb42-480b-b48f-952ed6776e1c", "type": "trace-create", "body": {"id": "litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "timestamp": "2024-05-07T20:11:22.420643Z", "name": "litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "userId": "litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "sessionId": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "release": "litellm-test-release", "version": "litellm-test-version", "metadata": {"trace_actual_metadata_key": "trace_actual_metadata_value", "generation_id": "litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}, "tags": ["litellm-test-tag1", "litellm-test-tag2", "cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.421233Z"}, {"id": "0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb", "type": "generation-create", "body": {"traceId": "litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "name": "litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "startTime": "2024-05-07T13:11:22.419075-07:00", "metadata": {"gen_metadata_key": "gen_metadata_value", "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "level": "DEFAULT", "version": "litellm-test-version", "id": "litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "endTime": "2024-05-07T13:11:22.419879-07:00", "completionStartTime": "2024-05-07T13:11:22.419879-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.2", "max_tokens": 100, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:22.421804Z"}, {"id": "1b34abb5-4a24-4042-a8c3-9f3ea0254f2b", "type": "trace-create", "body": {"id": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "timestamp": "2024-05-07T20:11:22.423093Z", "name": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "userId": "litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "sessionId": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "release": "litellm-test-release", "version": "litellm-test-version", "metadata": {"trace_actual_metadata_key": "trace_actual_metadata_value", "generation_id": "litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}, "tags": ["litellm-test-tag1", "litellm-test-tag2", "cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.423311Z"}, {"id": "050ba9cd-3eff-443b-9637-705406ceb8cb", "type": "generation-create", "body": {"traceId": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "name": "litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "startTime": "2024-05-07T13:11:22.421978-07:00", "metadata": {"gen_metadata_key": "gen_metadata_value", "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "level": "DEFAULT", "version": "litellm-test-version", "id": "litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "endTime": "2024-05-07T13:11:22.422551-07:00", "completionStartTime": "2024-05-07T13:11:22.422551-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.2", "max_tokens": 100, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:22.423829Z"}, {"id": "d5173131-5bef-46cd-aa5a-6dcd01f6c000", "type": "trace-create", "body": {"id": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "timestamp": "2024-05-07T20:11:22.425221Z", "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "metadata": {"trace_actual_metadata_key": "trace_actual_metadata_value", "generation_id": "litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}}, "timestamp": "2024-05-07T20:11:22.425370Z"}], "metadata": {"batch_size": 15, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion
-received response: {"errors":[],"successes":[{"id":"e3840349-5e27-4921-84fc-f11ac428b7c5","status":201},{"id":"54036ec0-06ff-44d1-ac3a-f6085a3983ab","status":201},{"id":"4bf88864-4937-48a4-8e9b-b1cf6a29cc5c","status":201},{"id":"050a1ed2-b54e-46ab-9145-04baca33524e","status":201},{"id":"94b80fdf-7df9-4b69-8500-df55a4748802","status":201},{"id":"659db88e-6adc-4c52-82d8-dac517773242","status":201},{"id":"383b9518-93ff-4943-ae0c-b3256ee3c1a7","status":201},{"id":"127d6d13-e8b0-44f2-bba1-cc5d9710b0b4","status":201},{"id":"2bc81d4d-f6a5-415b-acaa-feb883c41bbb","status":201},{"id":"d51705a9-088a-4f49-b494-f4fa8e6bc59e","status":201},{"id":"9d380abe-bb42-480b-b48f-952ed6776e1c","status":201},{"id":"0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb","status":201},{"id":"1b34abb5-4a24-4042-a8c3-9f3ea0254f2b","status":201},{"id":"050ba9cd-3eff-443b-9637-705406ceb8cb","status":201},{"id":"d5173131-5bef-46cd-aa5a-6dcd01f6c000","status":201}]}
-successfully uploaded batch of 15 items
-item size 1359
-Getting trace litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5
-Getting observations... None, None, None, None, litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5, None, GENERATION
-uploading batch of 1 items
-uploading data: {'batch': [{'id': 'ed61fc8d-aede-4c33-9ce8-377d498169f4', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425776, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 1, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}}
-making request: {"batch": [{"id": "ed61fc8d-aede-4c33-9ce8-377d498169f4", "type": "generation-create", "body": {"traceId": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "name": "litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "startTime": "2024-05-07T13:11:22.424075-07:00", "metadata": {"gen_metadata_key": "gen_metadata_value", "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "level": "DEFAULT", "version": "litellm-test-version", "id": "litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "endTime": "2024-05-07T13:11:22.424526-07:00", "completionStartTime": "2024-05-07T13:11:22.424526-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.2", "max_tokens": 100, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:22.425776Z"}], "metadata": {"batch_size": 1, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion
-Getting trace litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5
-received response: {"errors":[],"successes":[{"id":"ed61fc8d-aede-4c33-9ce8-377d498169f4","status":201}]}
-successfully uploaded batch of 1 items
-Getting observations... None, None, None, None, litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5, None, GENERATION
-consumer is running...
-flushing queue
-successfully flushed about 0 items.
-Creating trace id='litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 45, 796169, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id=None input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=['cache_hit:False'] public=None
-adding task {'id': '244ffc62-a30d-4281-8a86-bdfcb3edef05', 'type': 'trace-create', 'body': {'id': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 796169, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}}
-Creating generation trace_id='litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1' name='litellm-acompletion' start_time=datetime.datetime(2024, 5, 7, 13, 11, 45, 794599) metadata={'cache_hit': False} input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a' end_time=datetime.datetime(2024, 5, 7, 13, 11, 45, 795329) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 45, 795329) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
-item size 400
-adding task {'id': 'e9d12a6d-3fca-4adb-a018-bf276733ffa6', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 794599), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}}
-item size 876
+item size 956
+~0 items in the Langfuse queue
 uploading batch of 2 items
-uploading data: {'batch': [{'id': '244ffc62-a30d-4281-8a86-bdfcb3edef05', 'type': 'trace-create', 'body': {'id': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 796169, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 796433, tzinfo=datetime.timezone.utc)}, {'id': 'e9d12a6d-3fca-4adb-a018-bf276733ffa6', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 794599), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 797038, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}}
-making request: {"batch": [{"id": "244ffc62-a30d-4281-8a86-bdfcb3edef05", "type": "trace-create", "body": {"id": "litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1", "timestamp": "2024-05-07T20:11:45.796169Z", "name": "litellm-acompletion", "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:45.796433Z"}, {"id": "e9d12a6d-3fca-4adb-a018-bf276733ffa6", "type": "generation-create", "body": {"traceId": "litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:45.794599-07:00", "metadata": {"cache_hit": false}, "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a", "endTime": "2024-05-07T13:11:45.795329-07:00", "completionStartTime": "2024-05-07T13:11:45.795329-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "stream": false, "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:45.797038Z"}], "metadata": {"batch_size": 2, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion
-received response: {"errors":[],"successes":[{"id":"244ffc62-a30d-4281-8a86-bdfcb3edef05","status":201},{"id":"e9d12a6d-3fca-4adb-a018-bf276733ffa6","status":201}]}
+uploading data: {'batch': [{'id': '997346c5-9bb9-4789-9ba9-33893bc65ee3', 'type': 'trace-create', 'body': {'id': 'litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 47, 529980, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 47, 530178, tzinfo=datetime.timezone.utc)}, {'id': 'c1c856eb-0aad-4da1-b68c-b68295b847e1', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 47, 528930), 'metadata': {'litellm_response_cost': 5.4999999999999995e-05, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-47-528930_chatcmpl-811d9755-120c-4934-9efd-5ec08b8c41c6', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 47, 529521), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 47, 529521), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 47, 530501, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'default', 'sdk_name': 'python', 'sdk_version': '2.32.0', 'public_key': 'pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003'}}
+making request: {"batch": [{"id": "997346c5-9bb9-4789-9ba9-33893bc65ee3", "type": "trace-create", "body": {"id": "litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf", "timestamp": "2024-06-23T06:26:47.529980Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "redacted-by-litellm"}]}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:47.530178Z"}, {"id": "c1c856eb-0aad-4da1-b68c-b68295b847e1", "type": "generation-create", "body": {"traceId": "litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:47.528930-07:00", "metadata": {"litellm_response_cost": 5.4999999999999995e-05, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "redacted-by-litellm"}]}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-47-528930_chatcmpl-811d9755-120c-4934-9efd-5ec08b8c41c6", "endTime": "2024-06-22T23:26:47.529521-07:00", "completionStartTime": "2024-06-22T23:26:47.529521-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "stream": false, "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-06-23T06:26:47.530501Z"}], "metadata": {"batch_size": 2, "sdk_integration": "default", "sdk_name": "python", "sdk_version": "2.32.0", "public_key": "pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003"}} to https://us.cloud.langfuse.com/api/public/ingestion
+received response: {"errors":[],"successes":[{"id":"997346c5-9bb9-4789-9ba9-33893bc65ee3","status":201},{"id":"c1c856eb-0aad-4da1-b68c-b68295b847e1","status":201}]}
 successfully uploaded batch of 2 items
-Getting observations... None, None, None, None, litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1, None, GENERATION
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+Getting observations... None, None, None, None, litellm-test-4d2a861a-39d1-451c-8187-c1bc8f5253bf, None, GENERATION
+~0 items in the Langfuse queue
 consumer is running...
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
 flushing queue
 successfully flushed about 0 items.
-Creating trace id='litellm-test-d9136466-2e87-4afc-8367-dc51764251c7' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 48, 286447, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id=None input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=['cache_hit:False'] public=None
-adding task {'id': 'cab47524-1e1e-4404-b8bd-5f526895ac0c', 'type': 'trace-create', 'body': {'id': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 286447, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}}
-Creating generation trace_id='litellm-test-d9136466-2e87-4afc-8367-dc51764251c7' name='litellm-acompletion' start_time=datetime.datetime(2024, 5, 7, 13, 11, 48, 276681) metadata={'cache_hit': False} input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2' end_time=datetime.datetime(2024, 5, 7, 13, 11, 48, 285026) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 48, 278853) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=0, output=98, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=0.000196) prompt_name=None prompt_version=None...
-item size 400
-adding task {'id': '6bacab4d-822a-430f-85a9-4de1fa7ce259', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 276681), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 285026), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 278853), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 0, 'output': 98, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 0.000196}}}
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+Creating trace id='litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 50, 95341, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]} output={'content': 'redacted-by-litellm', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+Creating generation trace_id='litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 49, 844949) metadata={'litellm_response_cost': 4.1e-05, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]} output={'content': 'redacted-by-litellm', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-49-844949_chatcmpl-61f43be5-fc8e-4d92-ad89-8080b51f60de' end_time=datetime.datetime(2024, 6, 22, 23, 26, 49, 855530) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 49, 846913) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=14, output=10, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=4.1e-05) prompt_name=None prompt_version=None...
+item size 454
+item size 925
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+uploading batch of 2 items
+uploading data: {'batch': [{'id': '9bde426a-b7e9-480f-adc2-e1530b572882', 'type': 'trace-create', 'body': {'id': 'litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 50, 95341, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 50, 95711, tzinfo=datetime.timezone.utc)}, {'id': '77964887-be69-42b6-b903-8b01d37643ca', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 49, 844949), 'metadata': {'litellm_response_cost': 4.1e-05, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'redacted-by-litellm'}]}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-49-844949_chatcmpl-61f43be5-fc8e-4d92-ad89-8080b51f60de', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 49, 855530), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 49, 846913), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 14, 'output': 10, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 4.1e-05}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 50, 96374, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'default', 'sdk_name': 'python', 'sdk_version': '2.32.0', 'public_key': 'pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003'}}
+making request: {"batch": [{"id": "9bde426a-b7e9-480f-adc2-e1530b572882", "type": "trace-create", "body": {"id": "litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996", "timestamp": "2024-06-23T06:26:50.095341Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "redacted-by-litellm"}]}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:50.095711Z"}, {"id": "77964887-be69-42b6-b903-8b01d37643ca", "type": "generation-create", "body": {"traceId": "litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:49.844949-07:00", "metadata": {"litellm_response_cost": 4.1e-05, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "redacted-by-litellm"}]}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-49-844949_chatcmpl-61f43be5-fc8e-4d92-ad89-8080b51f60de", "endTime": "2024-06-22T23:26:49.855530-07:00", "completionStartTime": "2024-06-22T23:26:49.846913-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "stream": true, "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 14, "output": 10, "unit": "TOKENS", "totalCost": 4.1e-05}}, "timestamp": "2024-06-23T06:26:50.096374Z"}], "metadata": {"batch_size": 2, "sdk_integration": "default", "sdk_name": "python", "sdk_version": "2.32.0", "public_key": "pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003"}} to https://us.cloud.langfuse.com/api/public/ingestion
+~0 items in the Langfuse queue
+received response: {"errors":[],"successes":[{"id":"9bde426a-b7e9-480f-adc2-e1530b572882","status":201},{"id":"77964887-be69-42b6-b903-8b01d37643ca","status":201}]}
+successfully uploaded batch of 2 items
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+Getting observations... None, None, None, None, litellm-test-a6ce08b7-2364-4efd-b030-7ee3a9ed6996, None, GENERATION
+~0 items in the Langfuse queue
+consumer is running...
+~0 items in the Langfuse queue
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+Creating trace id='litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 52, 198564, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input='redacted-by-litellm' output='redacted-by-litellm' session_id=None release=None version=None metadata=None tags=[] public=None
+Creating generation trace_id='litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 52, 197638) metadata={'litellm_response_cost': 5.4999999999999995e-05, 'cache_hit': False} input='redacted-by-litellm' output='redacted-by-litellm' level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-52-197638_chatcmpl-089072da-028d-4425-ae6d-76e71d21df0d' end_time=datetime.datetime(2024, 6, 22, 23, 26, 52, 198243) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 52, 198243) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
+item size 375
 item size 860
+flushing queue
+successfully flushed about 0 items.
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
 uploading batch of 2 items
-uploading data: {'batch': [{'id': 'cab47524-1e1e-4404-b8bd-5f526895ac0c', 'type': 'trace-create', 'body': {'id': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 286447, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 286752, tzinfo=datetime.timezone.utc)}, {'id': '6bacab4d-822a-430f-85a9-4de1fa7ce259', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 276681), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 285026), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 278853), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 0, 'output': 98, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 0.000196}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 287077, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}}
-making request: {"batch": [{"id": "cab47524-1e1e-4404-b8bd-5f526895ac0c", "type": "trace-create", "body": {"id": "litellm-test-d9136466-2e87-4afc-8367-dc51764251c7", "timestamp": "2024-05-07T20:11:48.286447Z", "name": "litellm-acompletion", "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:48.286752Z"}, {"id": "6bacab4d-822a-430f-85a9-4de1fa7ce259", "type": "generation-create", "body": {"traceId": "litellm-test-d9136466-2e87-4afc-8367-dc51764251c7", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:48.276681-07:00", "metadata": {"cache_hit": false}, "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2", "endTime": "2024-05-07T13:11:48.285026-07:00", "completionStartTime": "2024-05-07T13:11:48.278853-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "stream": true, "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 0, "output": 98, "unit": "TOKENS", "totalCost": 0.000196}}, "timestamp": "2024-05-07T20:11:48.287077Z"}], "metadata": {"batch_size": 2, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion
-received response: {"errors":[],"successes":[{"id":"cab47524-1e1e-4404-b8bd-5f526895ac0c","status":201},{"id":"6bacab4d-822a-430f-85a9-4de1fa7ce259","status":201}]}
+uploading data: {'batch': [{'id': 'a44cc9e3-8b12-4a3f-b8d5-f7a3949ac5c2', 'type': 'trace-create', 'body': {'id': 'litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 52, 198564, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': 'redacted-by-litellm', 'output': 'redacted-by-litellm', 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 52, 198832, tzinfo=datetime.timezone.utc)}, {'id': 'fceda986-a5a6-4e87-b7e6-bf208a2f7589', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 52, 197638), 'metadata': {'litellm_response_cost': 5.4999999999999995e-05, 'cache_hit': False}, 'input': 'redacted-by-litellm', 'output': 'redacted-by-litellm', 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-52-197638_chatcmpl-089072da-028d-4425-ae6d-76e71d21df0d', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 52, 198243), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 52, 198243), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 52, 199379, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'default', 'sdk_name': 'python', 'sdk_version': '2.32.0', 'public_key': 'pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003'}}
+making request: {"batch": [{"id": "a44cc9e3-8b12-4a3f-b8d5-f7a3949ac5c2", "type": "trace-create", "body": {"id": "litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695", "timestamp": "2024-06-23T06:26:52.198564Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": "redacted-by-litellm", "output": "redacted-by-litellm", "tags": []}, "timestamp": "2024-06-23T06:26:52.198832Z"}, {"id": "fceda986-a5a6-4e87-b7e6-bf208a2f7589", "type": "generation-create", "body": {"traceId": "litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:52.197638-07:00", "metadata": {"litellm_response_cost": 5.4999999999999995e-05, "cache_hit": false}, "input": "redacted-by-litellm", "output": "redacted-by-litellm", "level": "DEFAULT", "id": "time-23-26-52-197638_chatcmpl-089072da-028d-4425-ae6d-76e71d21df0d", "endTime": "2024-06-22T23:26:52.198243-07:00", "completionStartTime": "2024-06-22T23:26:52.198243-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-06-23T06:26:52.199379Z"}], "metadata": {"batch_size": 2, "sdk_integration": "default", "sdk_name": "python", "sdk_version": "2.32.0", "public_key": "pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003"}} to https://us.cloud.langfuse.com/api/public/ingestion
+~0 items in the Langfuse queue
+received response: {"errors":[],"successes":[{"id":"a44cc9e3-8b12-4a3f-b8d5-f7a3949ac5c2","status":201},{"id":"fceda986-a5a6-4e87-b7e6-bf208a2f7589","status":201}]}
 successfully uploaded batch of 2 items
-Getting observations... None, None, None, None, litellm-test-d9136466-2e87-4afc-8367-dc51764251c7, None, GENERATION
-joining 1 consumer threads
-consumer thread 0 joined
-joining 1 consumer threads
-consumer thread 0 joined
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+Getting trace litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+Getting observations... None, None, None, None, litellm-test-b3e968bf-c9cb-4f4d-a834-b0cba57e4695, None, GENERATION
+~0 items in the Langfuse queue
+`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs.
+flushing queue
+Creating trace id='litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6' timestamp=datetime.datetime(2024, 6, 23, 6, 26, 54, 545241, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id='langfuse_latency_test_user' input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': 'This is a test response', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=[] public=None
+successfully flushed about 0 items.
+Creating generation trace_id='litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6' name='litellm-acompletion' start_time=datetime.datetime(2024, 6, 22, 23, 26, 54, 540644) metadata={'litellm_response_cost': 5.4999999999999995e-05, 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'This is a test'}]} output={'content': 'This is a test response', 'role': 'assistant'} level=<ObservationLevel.DEFAULT: 'DEFAULT'> status_message=None parent_observation_id=None version=None id='time-23-26-54-540644_chatcmpl-5c5777de-9eaf-4515-ad2c-b9a9cf2cfbe5' end_time=datetime.datetime(2024, 6, 22, 23, 26, 54, 543392) completion_start_time=datetime.datetime(2024, 6, 22, 23, 26, 54, 543392) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None...
+item size 453
+item size 938
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+uploading batch of 2 items
+uploading data: {'batch': [{'id': '696d738d-b46a-418f-be31-049e9add4bd8', 'type': 'trace-create', 'body': {'id': 'litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6', 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 54, 545241, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'userId': 'langfuse_latency_test_user', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': 'This is a test response', 'role': 'assistant'}, 'tags': []}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 54, 545804, tzinfo=datetime.timezone.utc)}, {'id': 'caf378b4-ae86-4a74-a7ac-2f9a83ed9d67', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 6, 22, 23, 26, 54, 540644), 'metadata': {'litellm_response_cost': 5.4999999999999995e-05, 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': 'This is a test response', 'role': 'assistant'}, 'level': <ObservationLevel.DEFAULT: 'DEFAULT'>, 'id': 'time-23-26-54-540644_chatcmpl-5c5777de-9eaf-4515-ad2c-b9a9cf2cfbe5', 'endTime': datetime.datetime(2024, 6, 22, 23, 26, 54, 543392), 'completionStartTime': datetime.datetime(2024, 6, 22, 23, 26, 54, 543392), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': <ModelUsageUnit.TOKENS: 'TOKENS'>, 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 6, 23, 6, 26, 54, 547005, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'default', 'sdk_name': 'python', 'sdk_version': '2.32.0', 'public_key': 'pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003'}}
+making request: {"batch": [{"id": "696d738d-b46a-418f-be31-049e9add4bd8", "type": "trace-create", "body": {"id": "litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6", "timestamp": "2024-06-23T06:26:54.545241Z", "name": "litellm-acompletion", "userId": "langfuse_latency_test_user", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "This is a test response", "role": "assistant"}, "tags": []}, "timestamp": "2024-06-23T06:26:54.545804Z"}, {"id": "caf378b4-ae86-4a74-a7ac-2f9a83ed9d67", "type": "generation-create", "body": {"traceId": "litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6", "name": "litellm-acompletion", "startTime": "2024-06-22T23:26:54.540644-07:00", "metadata": {"litellm_response_cost": 5.4999999999999995e-05, "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "This is a test response", "role": "assistant"}, "level": "DEFAULT", "id": "time-23-26-54-540644_chatcmpl-5c5777de-9eaf-4515-ad2c-b9a9cf2cfbe5", "endTime": "2024-06-22T23:26:54.543392-07:00", "completionStartTime": "2024-06-22T23:26:54.543392-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-06-23T06:26:54.547005Z"}], "metadata": {"batch_size": 2, "sdk_integration": "default", "sdk_name": "python", "sdk_version": "2.32.0", "public_key": "pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003"}} to https://us.cloud.langfuse.com/api/public/ingestion
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+received response: {"errors":[],"successes":[{"id":"696d738d-b46a-418f-be31-049e9add4bd8","status":201},{"id":"caf378b4-ae86-4a74-a7ac-2f9a83ed9d67","status":201}]}
+successfully uploaded batch of 2 items
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+Getting trace litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6
+~0 items in the Langfuse queue
+Getting observations... None, None, None, None, litellm-test-2a7ed10d-b0aa-41c3-874e-adb2e128a9a6, None, GENERATION
+~0 items in the Langfuse queue
 joining 1 consumer threads
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
 consumer thread 0 joined
 joining 1 consumer threads
+~0 items in the Langfuse queue
+~0 items in the Langfuse queue
 consumer thread 0 joined
 joining 1 consumer threads
+~0 items in the Langfuse queue
 consumer thread 0 joined
 joining 1 consumer threads
+~0 items in the Langfuse queue
 consumer thread 0 joined

From b577f65798b210936645ec66ad5a7d451861f6b0 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 22 Jun 2024 23:53:18 -0700
Subject: [PATCH 030/150] test(test_proxy_server_langfuse.py): cleanup tests
 causing OOM issues.

---
 litellm/tests/test_proxy_server_langfuse.py | 26 ++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/litellm/tests/test_proxy_server_langfuse.py b/litellm/tests/test_proxy_server_langfuse.py
index 4f896f792..abd4d2788 100644
--- a/litellm/tests/test_proxy_server_langfuse.py
+++ b/litellm/tests/test_proxy_server_langfuse.py
@@ -1,19 +1,24 @@
-import sys, os
+import os
+import sys
 import traceback
+
 from dotenv import load_dotenv
 
 load_dotenv()
-import os, io
+import io
+import os
 
 # this file is to test litellm/proxy
 
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import pytest, logging
+import logging
+
+import pytest
+
 import litellm
-from litellm import embedding, completion, completion_cost, Timeout
-from litellm import RateLimitError
+from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
 
 # Configure logging
 logging.basicConfig(
@@ -21,14 +26,16 @@ logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s",
 )
 
+from fastapi import FastAPI
+
 # test /chat/completion request to the proxy
 from fastapi.testclient import TestClient
-from fastapi import FastAPI
-from litellm.proxy.proxy_server import (
+
+from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
     router,
     save_worker_config,
     startup_event,
-)  # Replace with the actual module where your FastAPI router is defined
+)
 
 filepath = os.path.dirname(os.path.abspath(__file__))
 config_fp = f"{filepath}/test_configs/test_config.yaml"
@@ -67,6 +74,9 @@ def client():
         yield client
 
 
+@pytest.mark.skip(
+    reason="Init multiple Langfuse clients causing OOM issues. Reduce init clients on ci/cd. "
+)
 def test_chat_completion(client):
     try:
         # Your test data

From d2fe2b30a9debdea3d421e726ba2b35aa77b8290 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sun, 23 Jun 2024 00:06:15 -0700
Subject: [PATCH 031/150] test(test_completion.py): handle replicate api error

---
 litellm/tests/test_completion.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 31ac792d8..830b3acd3 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -2580,6 +2580,8 @@ async def test_completion_replicate_llama3(sync_mode):
         # Add any assertions here to check the response
         assert isinstance(response, litellm.ModelResponse)
         response_format_tests(response=response)
+    except litellm.APIError as e:
+        pass
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 

From af21695f2b856b4c0e50f213711fcf12b3f8168b Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sun, 23 Jun 2024 00:30:45 -0700
Subject: [PATCH 032/150] test: skip unstable tests

---
 litellm/tests/test_dynamic_rate_limit_handler.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/litellm/tests/test_dynamic_rate_limit_handler.py b/litellm/tests/test_dynamic_rate_limit_handler.py
index 6e1b55d18..4f49abff8 100644
--- a/litellm/tests/test_dynamic_rate_limit_handler.py
+++ b/litellm/tests/test_dynamic_rate_limit_handler.py
@@ -296,6 +296,9 @@ async def test_update_cache(
     assert active_projects == 1
 
 
+@pytest.mark.skip(
+    reason="Unstable on ci/cd due to curr minute changes. Refactor to handle minute changing"
+)
 @pytest.mark.parametrize("num_projects", [2])
 @pytest.mark.asyncio
 async def test_multiple_projects(
@@ -350,8 +353,10 @@ async def test_multiple_projects(
     prev_availability: Optional[int] = None
 
     print("expected_runs: {}".format(expected_runs))
+
     for i in range(expected_runs + 1):
         # check availability
+
         availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
             model=model
         )
@@ -390,6 +395,9 @@ async def test_multiple_projects(
     assert availability == 0
 
 
+@pytest.mark.skip(
+    reason="Unstable on ci/cd due to curr minute changes. Refactor to handle minute changing"
+)
 @pytest.mark.parametrize("num_projects", [2])
 @pytest.mark.asyncio
 async def test_multiple_projects_e2e(

From 4e84147593061fb85c4b10b1873d6216483ed152 Mon Sep 17 00:00:00 2001
From: 7HR4IZ3 <90985774+7HR4IZ3@users.noreply.github.com>
Date: Mon, 24 Jun 2024 14:09:40 +0100
Subject: [PATCH 033/150] fix: Lunary integration

Fixes the bug of litellm not logging system messages to lunary
---
 litellm/integrations/lunary.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/litellm/integrations/lunary.py b/litellm/integrations/lunary.py
index f9b2f25e7..b0cc069c4 100644
--- a/litellm/integrations/lunary.py
+++ b/litellm/integrations/lunary.py
@@ -108,6 +108,7 @@ class LunaryLogger:
         try:
             print_verbose(f"Lunary Logging - Logging request for model {model}")
 
+            template_id = None
             litellm_params = kwargs.get("litellm_params", {})
             optional_params = kwargs.get("optional_params", {})
             metadata = litellm_params.get("metadata", {}) or {}

From b2302bf224e992f515c6c4703b2eafa6508dc74a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 07:54:08 -0700
Subject: [PATCH 034/150] fix ui login bug

---
 litellm/proxy/proxy_server.py | 52 ++++++++++++++---------------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 30b90abe6..a702cecbd 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -7508,12 +7508,6 @@ async def login(request: Request):
             litellm_dashboard_ui += "/ui/"
         import jwt
 
-        if litellm_master_key_hash is None:
-            raise HTTPException(
-                status_code=500,
-                detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
-            )
-
         jwt_token = jwt.encode(
             {
                 "user_id": user_id,
@@ -7523,7 +7517,7 @@ async def login(request: Request):
                 "login_method": "username_password",
                 "premium_user": premium_user,
             },
-            litellm_master_key_hash,
+            master_key,
             algorithm="HS256",
         )
         litellm_dashboard_ui += "?userID=" + user_id
@@ -7578,14 +7572,6 @@ async def login(request: Request):
                 litellm_dashboard_ui += "/ui/"
             import jwt
 
-            if litellm_master_key_hash is None:
-                raise HTTPException(
-                    status_code=500,
-                    detail={
-                        "error": "No master key set, please set LITELLM_MASTER_KEY"
-                    },
-                )
-
             jwt_token = jwt.encode(
                 {
                     "user_id": user_id,
@@ -7595,7 +7581,7 @@ async def login(request: Request):
                     "login_method": "username_password",
                     "premium_user": premium_user,
                 },
-                litellm_master_key_hash,
+                master_key,
                 algorithm="HS256",
             )
             litellm_dashboard_ui += "?userID=" + user_id
@@ -7642,7 +7628,14 @@ async def onboarding(invite_link: str):
     - Get user from db
     - Pass in user_email if set
     """
-    global prisma_client
+    global prisma_client, master_key
+    if master_key is None:
+        raise ProxyException(
+            message="Master Key not set for Proxy. Please set Master Key to use Admin UI. Set `LITELLM_MASTER_KEY` in .env or set general_settings:master_key in config.yaml.  https://docs.litellm.ai/docs/proxy/virtual_keys. If set, use `--detailed_debug` to debug issue.",
+            type="auth_error",
+            param="master_key",
+            code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        )
     ### VALIDATE INVITE LINK ###
     if prisma_client is None:
         raise HTTPException(
@@ -7714,12 +7707,6 @@ async def onboarding(invite_link: str):
         litellm_dashboard_ui += "/ui/onboarding"
     import jwt
 
-    if litellm_master_key_hash is None:
-        raise HTTPException(
-            status_code=500,
-            detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
-        )
-
     jwt_token = jwt.encode(
         {
             "user_id": user_obj.user_id,
@@ -7729,7 +7716,7 @@ async def onboarding(invite_link: str):
             "login_method": "username_password",
             "premium_user": premium_user,
         },
-        litellm_master_key_hash,
+        master_key,
         algorithm="HS256",
     )
 
@@ -7862,11 +7849,18 @@ def get_image():
 @app.get("/sso/callback", tags=["experimental"], include_in_schema=False)
 async def auth_callback(request: Request):
     """Verify login"""
-    global general_settings, ui_access_mode, premium_user
+    global general_settings, ui_access_mode, premium_user, master_key
     microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
     google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
     generic_client_id = os.getenv("GENERIC_CLIENT_ID", None)
     # get url from request
+    if master_key is None:
+        raise ProxyException(
+            message="Master Key not set for Proxy. Please set Master Key to use Admin UI. Set `LITELLM_MASTER_KEY` in .env or set general_settings:master_key in config.yaml.  https://docs.litellm.ai/docs/proxy/virtual_keys. If set, use `--detailed_debug` to debug issue.",
+            type="auth_error",
+            param="master_key",
+            code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        )
     redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
     if redirect_url.endswith("/"):
         redirect_url += "sso/callback"
@@ -8140,12 +8134,6 @@ async def auth_callback(request: Request):
 
     import jwt
 
-    if litellm_master_key_hash is None:
-        raise HTTPException(
-            status_code=500,
-            detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
-        )
-
     jwt_token = jwt.encode(
         {
             "user_id": user_id,
@@ -8155,7 +8143,7 @@ async def auth_callback(request: Request):
             "login_method": "sso",
             "premium_user": premium_user,
         },
-        litellm_master_key_hash,
+        master_key,
         algorithm="HS256",
     )
     litellm_dashboard_ui += "?userID=" + user_id

From 6ac0f20099295f03cf2640b73ed26cbf4e000274 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 13:21:16 -0700
Subject: [PATCH 035/150] docs - update telemetry

---
 docs/my-website/docs/observability/telemetry.md | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/docs/my-website/docs/observability/telemetry.md b/docs/my-website/docs/observability/telemetry.md
index 78267b9c5..232295566 100644
--- a/docs/my-website/docs/observability/telemetry.md
+++ b/docs/my-website/docs/observability/telemetry.md
@@ -1,13 +1,8 @@
 # Telemetry 
 
-LiteLLM contains a telemetry feature that tells us what models are used, and what errors are hit.
+There is no Telemetry on LiteLLM - no data is stored by us
 
 ## What is logged? 
 
-Only the model name and exception raised is logged. 
+NOTHING - no data is sent to LiteLLM Servers
 
-## Why?
-We use this information to help us understand how LiteLLM is used, and improve stability. 
-
-## Opting out
-If you prefer to opt out of telemetry, you can do this by setting `litellm.telemetry = False`. 
\ No newline at end of file

From 247d71db7ada8d2a479298048abee81eaf704e66 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 16:55:19 -0700
Subject: [PATCH 036/150] fix(utils.py): fix exception_mapping check for errors

If exception already mapped - don't attach traceback to it
---
 litellm/exceptions.py | 16 +++++-----------
 litellm/utils.py      |  4 ++++
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/litellm/exceptions.py b/litellm/exceptions.py
index 9674d48b1..98b519278 100644
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@@ -9,10 +9,11 @@
 
 ## LiteLLM versions of the OpenAI Exception Types
 
-import openai
-import httpx
 from typing import Optional
 
+import httpx
+import openai
+
 
 class AuthenticationError(openai.AuthenticationError):  # type: ignore
     def __init__(
@@ -658,15 +659,8 @@ class APIResponseValidationError(openai.APIResponseValidationError):  # type: ig
 
 
 class OpenAIError(openai.OpenAIError):  # type: ignore
-    def __init__(self, original_exception):
-        self.status_code = original_exception.http_status
-        super().__init__(
-            http_body=original_exception.http_body,
-            http_status=original_exception.http_status,
-            json_body=original_exception.json_body,
-            headers=original_exception.headers,
-            code=original_exception.code,
-        )
+    def __init__(self, original_exception=None):
+        super().__init__()
         self.llm_provider = "openai"
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 0849ba3a2..ce66d0fbb 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5914,6 +5914,7 @@ def exception_type(
                         )
                 else:
                     # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
+                    # exception_mapping_worked = True
                     raise APIConnectionError(
                         message=f"APIConnectionError: {exception_provider} - {message}",
                         llm_provider=custom_llm_provider,
@@ -7460,6 +7461,9 @@ def exception_type(
         if exception_mapping_worked:
             raise e
         else:
+            for error_type in litellm.LITELLM_EXCEPTION_TYPES:
+                if isinstance(e, error_type):
+                    raise e  # it's already mapped
             raise APIConnectionError(
                 message="{}\n{}".format(original_exception, traceback.format_exc()),
                 llm_provider="",

From 2e588c06857e12ecc22af7f5917eb77864668f8d Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 17:25:26 -0700
Subject: [PATCH 037/150] fix(router.py): use user-defined model_input_tokens
 for pre-call filter checks

---
 litellm/proxy/_new_secret_config.yaml | 16 ++++++++--
 litellm/router.py                     | 42 +++++++++++++++++++++++++--
 litellm/tests/test_router.py          |  5 ++++
 3 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 640a3b2cf..78d7dc70c 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -4,7 +4,17 @@ model_list:
       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
       api_key: my-fake-key
       aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
+      mock_response: "Hello world 1"
+    model_info: 
+      max_input_tokens: 0 # trigger context window fallback
+  - model_name: my-fake-model
+    litellm_params:
+      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+      api_key: my-fake-key
+      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
+      mock_response: "Hello world 2"
+    model_info: 
+      max_input_tokens: 0
 
-litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+router_settings:
+  enable_pre_call_checks: True
diff --git a/litellm/router.py b/litellm/router.py
index e9b0cc00a..6163da487 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -404,6 +404,7 @@ class Router:
             litellm.failure_callback = [self.deployment_callback_on_failure]
         print(  # noqa
             f"Intialized router with Routing strategy: {self.routing_strategy}\n\n"
+            f"Routing enable_pre_call_checks: {self.enable_pre_call_checks}\n\n"
             f"Routing fallbacks: {self.fallbacks}\n\n"
             f"Routing content fallbacks: {self.content_policy_fallbacks}\n\n"
             f"Routing context window fallbacks: {self.context_window_fallbacks}\n\n"
@@ -3915,9 +3916,38 @@ class Router:
                         raise Exception("Model invalid format - {}".format(type(model)))
         return None
 
+    def get_router_model_info(self, deployment: dict) -> ModelMapInfo:
+        """
+        For a given model id, return the model info (max tokens, input cost, output cost, etc.).
+
+        Augment litellm info with additional params set in `model_info`.
+
+        Returns
+        - ModelInfo - If found -> typed dict with max tokens, input cost, etc.
+        """
+        ## SET MODEL NAME
+        base_model = deployment.get("model_info", {}).get("base_model", None)
+        if base_model is None:
+            base_model = deployment.get("litellm_params", {}).get("base_model", None)
+        model = base_model or deployment.get("litellm_params", {}).get("model", None)
+
+        ## GET LITELLM MODEL INFO
+        model_info = litellm.get_model_info(model=model)
+
+        ## CHECK USER SET MODEL INFO
+        user_model_info = deployment.get("model_info", {})
+
+        model_info.update(user_model_info)
+
+        return model_info
+
     def get_model_info(self, id: str) -> Optional[dict]:
         """
         For a given model id, return the model info
+
+        Returns
+        - dict: the model in list with 'model_name', 'litellm_params', Optional['model_info']
+        - None: could not find deployment in list
         """
         for model in self.model_list:
             if "model_info" in model and "id" in model["model_info"]:
@@ -4307,6 +4337,7 @@ class Router:
             return _returned_deployments
 
         _context_window_error = False
+        _potential_error_str = ""
         _rate_limit_error = False
 
         ## get model group RPM ##
@@ -4327,7 +4358,7 @@ class Router:
                 model = base_model or deployment.get("litellm_params", {}).get(
                     "model", None
                 )
-                model_info = litellm.get_model_info(model=model)
+                model_info = self.get_router_model_info(deployment=deployment)
 
                 if (
                     isinstance(model_info, dict)
@@ -4339,6 +4370,11 @@ class Router:
                     ):
                         invalid_model_indices.append(idx)
                         _context_window_error = True
+                        _potential_error_str += (
+                            "Model={}, Max Input Tokens={}, Got={}".format(
+                                model, model_info["max_input_tokens"], input_tokens
+                            )
+                        )
                         continue
             except Exception as e:
                 verbose_router_logger.debug("An error occurs - {}".format(str(e)))
@@ -4440,7 +4476,9 @@ class Router:
                 )
             elif _context_window_error == True:
                 raise litellm.ContextWindowExceededError(
-                    message="Context Window exceeded for given call",
+                    message="litellm._pre_call_checks: Context Window exceeded for given call. No models have context window large enough for this call.\n{}".format(
+                        _potential_error_str
+                    ),
                     model=model,
                     llm_provider="",
                     response=httpx.Response(
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 2e8814327..84ea9e1c9 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -755,6 +755,7 @@ def test_router_context_window_check_pre_call_check_in_group():
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "base_model": "azure/gpt-35-turbo",
+                    "mock_response": "Hello world 1!",
                 },
             },
             {
@@ -762,6 +763,7 @@ def test_router_context_window_check_pre_call_check_in_group():
                 "litellm_params": {  # params for litellm completion/embedding call
                     "model": "gpt-3.5-turbo-1106",
                     "api_key": os.getenv("OPENAI_API_KEY"),
+                    "mock_response": "Hello world 2!",
                 },
             },
         ]
@@ -777,6 +779,9 @@ def test_router_context_window_check_pre_call_check_in_group():
         )
 
         print(f"response: {response}")
+
+        assert response.choices[0].message.content == "Hello world 2!"
+        assert False
     except Exception as e:
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 

From 82c6f3109525e274f2b26f756cd66a00c1e71089 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 17:28:12 -0700
Subject: [PATCH 038/150] test(test_router.py): add testing

---
 litellm/tests/test_router.py | 57 ++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 84ea9e1c9..3237c8084 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -732,7 +732,61 @@ def test_router_rpm_pre_call_check():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
-def test_router_context_window_check_pre_call_check_in_group():
+def test_router_context_window_check_pre_call_check_in_group_custom_model_info():
+    """
+    - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
+    - Send a 5k prompt
+    - Assert it works
+    """
+    import os
+
+    from large_text import text
+
+    litellm.set_verbose = False
+
+    print(f"len(text): {len(text)}")
+    try:
+        model_list = [
+            {
+                "model_name": "gpt-3.5-turbo",  # openai model name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                    "base_model": "azure/gpt-35-turbo",
+                    "mock_response": "Hello world 1!",
+                },
+                "model_info": {"max_input_tokens": 100},
+            },
+            {
+                "model_name": "gpt-3.5-turbo",  # openai model name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "gpt-3.5-turbo-1106",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                    "mock_response": "Hello world 2!",
+                },
+                "model_info": {"max_input_tokens": 0},
+            },
+        ]
+
+        router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0)  # type: ignore
+
+        response = router.completion(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": "Who was Alexander?"},
+            ],
+        )
+
+        print(f"response: {response}")
+
+        assert response.choices[0].message.content == "Hello world 1!"
+    except Exception as e:
+        pytest.fail(f"Got unexpected exception on router! - {str(e)}")
+
+
+def test_router_context_window_check_pre_call_check():
     """
     - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
     - Send a 5k prompt
@@ -781,7 +835,6 @@ def test_router_context_window_check_pre_call_check_in_group():
         print(f"response: {response}")
 
         assert response.choices[0].message.content == "Hello world 2!"
-        assert False
     except Exception as e:
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 

From 438f65666bee9ca575a97f29fa4d97f4769c9237 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 19:41:29 -0700
Subject: [PATCH 039/150] fix(utils.py): catch 422-status errors

---
 litellm/llms/replicate.py | 27 ++++++++++++++++++++-------
 litellm/utils.py          |  8 ++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py
index ce62e51e9..56549cfd4 100644
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@@ -1,13 +1,18 @@
-import os, types
+import asyncio
 import json
-import requests  # type: ignore
+import os
 import time
-from typing import Callable, Optional, Union, Tuple, Any
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
-import litellm, asyncio
+import types
+from typing import Any, Callable, Optional, Tuple, Union
+
 import httpx  # type: ignore
-from .prompt_templates.factory import prompt_factory, custom_prompt
+import requests  # type: ignore
+
+import litellm
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
+
+from .prompt_templates.factory import custom_prompt, prompt_factory
 
 
 class ReplicateError(Exception):
@@ -329,7 +334,15 @@ async def async_handle_prediction_response_streaming(
             response_data = response.json()
             status = response_data["status"]
             if "output" in response_data:
-                output_string = "".join(response_data["output"])
+                try:
+                    output_string = "".join(response_data["output"])
+                except Exception as e:
+                    raise ReplicateError(
+                        status_code=422,
+                        message="Unable to parse response. Got={}".format(
+                            response_data["output"]
+                        ),
+                    )
                 new_output = output_string[len(previous_output) :]
                 print_verbose(f"New chunk: {new_output}")
                 yield {"output": new_output, "status": status}
diff --git a/litellm/utils.py b/litellm/utils.py
index ce66d0fbb..1bc8bf771 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6068,6 +6068,14 @@ def exception_type(
                             model=model,
                             llm_provider="replicate",
                         )
+                    elif original_exception.status_code == 422:
+                        exception_mapping_worked = True
+                        raise UnprocessableEntityError(
+                            message=f"ReplicateException - {original_exception.message}",
+                            llm_provider="replicate",
+                            model=model,
+                            response=original_exception.response,
+                        )
                     elif original_exception.status_code == 429:
                         exception_mapping_worked = True
                         raise RateLimitError(

From 6f2f89d7b28ffa1cd96fb3b4a5d5e09631d6a4c8 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 19:13:56 -0700
Subject: [PATCH 040/150] fix(vertex_httpx.py): cover gemini content violation
 (on prompt)

---
 litellm/llms/vertex_httpx.py            | 87 +++++++++++++++++++++----
 litellm/proxy/_super_secret_config.yaml |  3 +
 litellm/types/llms/vertex_ai.py         |  6 +-
 3 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 63bcd9f4f..028c3f721 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -563,6 +563,43 @@ class VertexLLM(BaseLLM):
             )
 
         ## CHECK IF RESPONSE FLAGGED
+        if "promptFeedback" in completion_response:
+            if "blockReason" in completion_response["promptFeedback"]:
+                # If set, the prompt was blocked and no candidates are returned. Rephrase your prompt
+                model_response.choices[0].finish_reason = "content_filter"
+
+                chat_completion_message: ChatCompletionResponseMessage = {
+                    "role": "assistant",
+                    "content": None,
+                }
+
+                choice = litellm.Choices(
+                    finish_reason="content_filter",
+                    index=0,
+                    message=chat_completion_message,  # type: ignore
+                    logprobs=None,
+                    enhancements=None,
+                )
+
+                model_response.choices = [choice]
+
+                ## GET USAGE ##
+                usage = litellm.Usage(
+                    prompt_tokens=completion_response["usageMetadata"][
+                        "promptTokenCount"
+                    ],
+                    completion_tokens=completion_response["usageMetadata"].get(
+                        "candidatesTokenCount", 0
+                    ),
+                    total_tokens=completion_response["usageMetadata"][
+                        "totalTokenCount"
+                    ],
+                )
+
+                setattr(model_response, "usage", usage)
+
+                return model_response
+
         if len(completion_response["candidates"]) > 0:
             content_policy_violations = (
                 VertexGeminiConfig().get_flagged_finish_reasons()
@@ -573,16 +610,40 @@ class VertexLLM(BaseLLM):
                 in content_policy_violations.keys()
             ):
                 ## CONTENT POLICY VIOLATION ERROR
-                raise VertexAIError(
-                    status_code=400,
-                    message="The response was blocked. Reason={}. Raw Response={}".format(
-                        content_policy_violations[
-                            completion_response["candidates"][0]["finishReason"]
-                        ],
-                        completion_response,
-                    ),
+                model_response.choices[0].finish_reason = "content_filter"
+
+                chat_completion_message = {
+                    "role": "assistant",
+                    "content": None,
+                }
+
+                choice = litellm.Choices(
+                    finish_reason="content_filter",
+                    index=0,
+                    message=chat_completion_message,  # type: ignore
+                    logprobs=None,
+                    enhancements=None,
                 )
 
+                model_response.choices = [choice]
+
+                ## GET USAGE ##
+                usage = litellm.Usage(
+                    prompt_tokens=completion_response["usageMetadata"][
+                        "promptTokenCount"
+                    ],
+                    completion_tokens=completion_response["usageMetadata"].get(
+                        "candidatesTokenCount", 0
+                    ),
+                    total_tokens=completion_response["usageMetadata"][
+                        "totalTokenCount"
+                    ],
+                )
+
+                setattr(model_response, "usage", usage)
+
+                return model_response
+
         model_response.choices = []  # type: ignore
 
         ## GET MODEL ##
@@ -590,9 +651,7 @@ class VertexLLM(BaseLLM):
 
         try:
             ## GET TEXT ##
-            chat_completion_message: ChatCompletionResponseMessage = {
-                "role": "assistant"
-            }
+            chat_completion_message = {"role": "assistant"}
             content_str = ""
             tools: List[ChatCompletionToolCallChunk] = []
             for idx, candidate in enumerate(completion_response["candidates"]):
@@ -632,9 +691,9 @@ class VertexLLM(BaseLLM):
             ## GET USAGE ##
             usage = litellm.Usage(
                 prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
-                completion_tokens=completion_response["usageMetadata"][
-                    "candidatesTokenCount"
-                ],
+                completion_tokens=completion_response["usageMetadata"].get(
+                    "candidatesTokenCount", 0
+                ),
                 total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
             )
 
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index 04a4806c1..c5f1b4768 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -1,4 +1,7 @@
 model_list:
+- model_name: gemini-1.5-flash-gemini
+  litellm_params:
+    model: gemini/gemini-1.5-flash
 - litellm_params:
     api_base: http://0.0.0.0:8080
     api_key: ''
diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py
index 1612f8761..2dda57c2e 100644
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@@ -227,9 +227,9 @@ class PromptFeedback(TypedDict):
     blockReasonMessage: str
 
 
-class UsageMetadata(TypedDict):
-    promptTokenCount: int
-    totalTokenCount: int
+class UsageMetadata(TypedDict, total=False):
+    promptTokenCount: Required[int]
+    totalTokenCount: Required[int]
     candidatesTokenCount: int
 
 

From a087596be01d42ff5182ee8387debb90e00acafa Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 19:22:20 -0700
Subject: [PATCH 041/150] fix(vertex_httpx.py): Return empty model response for
 content filter violations

---
 litellm/llms/vertex_httpx.py                  |  6 +--
 .../tests/test_amazing_vertex_completion.py   | 41 ++++++++++++++-----
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 028c3f721..856b05f61 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -562,6 +562,9 @@ class VertexLLM(BaseLLM):
                 status_code=422,
             )
 
+        ## GET MODEL ##
+        model_response.model = model
+
         ## CHECK IF RESPONSE FLAGGED
         if "promptFeedback" in completion_response:
             if "blockReason" in completion_response["promptFeedback"]:
@@ -646,9 +649,6 @@ class VertexLLM(BaseLLM):
 
         model_response.choices = []  # type: ignore
 
-        ## GET MODEL ##
-        model_response.model = model
-
         try:
             ## GET TEXT ##
             chat_completion_message = {"role": "assistant"}
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index fb2891249..c9e5501a8 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -696,6 +696,18 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
             pytest.fail("An unexpected exception occurred - {}".format(str(e)))
 
 
+def vertex_httpx_mock_reject_prompt_post(*args, **kwargs):
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"Content-Type": "application/json"}
+    mock_response.json.return_value = {
+        "promptFeedback": {"blockReason": "OTHER"},
+        "usageMetadata": {"promptTokenCount": 6285, "totalTokenCount": 6285},
+    }
+
+    return mock_response
+
+
 # @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
 def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
     mock_response = MagicMock()
@@ -817,8 +829,11 @@ def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
 
 
 @pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
+@pytest.mark.parametrize("content_filter_type", ["prompt", "response"])  # "vertex_ai",
 @pytest.mark.asyncio
-async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
+async def test_gemini_pro_json_schema_httpx_content_policy_error(
+    provider, content_filter_type
+):
     load_vertex_ai_credentials()
     litellm.set_verbose = True
     messages = [
@@ -839,16 +854,20 @@ Using this JSON schema:
 
     client = HTTPHandler()
 
-    with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
-        try:
-            response = completion(
-                model="vertex_ai_beta/gemini-1.5-flash",
-                messages=messages,
-                response_format={"type": "json_object"},
-                client=client,
-            )
-        except litellm.ContentPolicyViolationError as e:
-            pass
+    if content_filter_type == "prompt":
+        _side_effect = vertex_httpx_mock_reject_prompt_post
+    else:
+        _side_effect = vertex_httpx_mock_post
+
+    with patch.object(client, "post", side_effect=_side_effect) as mock_call:
+        response = completion(
+            model="vertex_ai_beta/gemini-1.5-flash",
+            messages=messages,
+            response_format={"type": "json_object"},
+            client=client,
+        )
+
+        assert response.choices[0].finish_reason == "content_filter"
 
         mock_call.assert_called_once()
 

From e30410f7001eaabebcce9613d43e7e5eb1f44170 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:45:13 -0700
Subject: [PATCH 042/150] feat - allow user to define public routes

---
 litellm/proxy/auth/user_api_key_auth.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index 3d14f5300..f6e3a0dfe 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -137,7 +137,9 @@ async def user_api_key_auth(
         """
         route: str = request.url.path
 
-        if route in LiteLLMRoutes.public_routes.value:
+        if route in LiteLLMRoutes.public_routes.value or route in general_settings.get(
+            "public_routes", []
+        ):
             # check if public endpoint
             return UserAPIKeyAuth(user_role=LitellmUserRoles.INTERNAL_USER_VIEW_ONLY)
 

From bfae8c3da696624b9edd72bf7ed102dec5b5713f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 17:46:38 -0700
Subject: [PATCH 043/150] example config with public routes

---
 litellm/proxy/proxy_config.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index d5190455f..8898dd8cb 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -21,6 +21,9 @@ model_list:
 general_settings:
   master_key: sk-1234
   alerting: ["slack", "email"]
+  public_routes: [
+    "/spend/calculate",
+  ]
 
 litellm_settings:
   success_callback: ["prometheus"]

From 2cb4d845aec54824d436348b673f907c99b6c644 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 19:05:34 -0700
Subject: [PATCH 044/150] feat - refactor /spend/calculate

---
 litellm/proxy/_types.py                       |  6 ++
 .../spend_management_endpoints.py             | 75 +++++++++++++++++--
 2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 0883763d1..640c7695a 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -1627,3 +1627,9 @@ class CommonProxyErrors(enum.Enum):
     no_llm_router = "No models configured on proxy"
     not_allowed_access = "Admin-only endpoint. Not allowed to access this."
     not_premium_user = "You must be a LiteLLM Enterprise user to use this feature. If you have a license please set `LITELLM_LICENSE` in your env. If you want to obtain a license meet with us here: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
+
+
+class SpendCalculateRequest(LiteLLMBase):
+    model: Optional[str] = None
+    messages: Optional[List] = None
+    completion_response: Optional[dict] = None
diff --git a/litellm/proxy/spend_tracking/spend_management_endpoints.py b/litellm/proxy/spend_tracking/spend_management_endpoints.py
index 11edd1887..8089c7acb 100644
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py
@@ -1199,7 +1199,7 @@ async def _get_spend_report_for_time_range(
         }
     },
 )
-async def calculate_spend(request: Request):
+async def calculate_spend(request: SpendCalculateRequest):
     """
     Accepts all the params of completion_cost.
 
@@ -1248,14 +1248,75 @@ async def calculate_spend(request: Request):
     }'
     ```
     """
-    from litellm import completion_cost
+    try:
+        from litellm import completion_cost
+        from litellm.cost_calculator import CostPerToken
+        from litellm.proxy.proxy_server import llm_router
 
-    data = await request.json()
-    if "completion_response" in data:
-        data["completion_response"] = litellm.ModelResponse(
-            **data["completion_response"]
+        _cost = None
+        if request.model is not None:
+            if request.messages is None:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Bad Request - messages must be provided if 'model' is provided",
+                )
+
+            # check if model in llm_router
+            _model_in_llm_router = None
+            cost_per_token: Optional[CostPerToken] = None
+            if llm_router is not None:
+                for model in llm_router.model_list:
+                    if model.get("model_name") == request.model:
+                        _model_in_llm_router = model
+
+            """
+            3 cases for /spend/calculate
+
+            1. user passes model, and model is defined on litellm config.yaml or in DB. use info on config or in DB in this case
+            2. user passes model, and model is not defined on litellm config.yaml or in DB. Pass model as is to litellm.completion_cost
+            3. user passes completion_response
+            
+            """
+            if _model_in_llm_router is not None:
+                _litellm_params = _model_in_llm_router.get("litellm_params")
+                _litellm_model_name = _litellm_params.get("model")
+                input_cost_per_token = _litellm_params.get("input_cost_per_token")
+                output_cost_per_token = _litellm_params.get("output_cost_per_token")
+                if (
+                    input_cost_per_token is not None
+                    or output_cost_per_token is not None
+                ):
+                    cost_per_token = CostPerToken(
+                        input_cost_per_token=input_cost_per_token,
+                        output_cost_per_token=output_cost_per_token,
+                    )
+
+                _cost = completion_cost(
+                    model=_litellm_model_name,
+                    messages=request.messages,
+                    custom_cost_per_token=cost_per_token,
+                )
+            else:
+                _cost = completion_cost(model=request.model, messages=request.messages)
+        else:
+            _completion_response = litellm.ModelResponse(request.completion_response)
+            _cost = completion_cost(completion_response=_completion_response)
+        return {"cost": _cost}
+    except Exception as e:
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", str(e)),
+                type=getattr(e, "type", "None"),
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+            )
+        error_msg = f"{str(e)}"
+        raise ProxyException(
+            message=getattr(e, "message", error_msg),
+            type=getattr(e, "type", "None"),
+            param=getattr(e, "param", "None"),
+            code=getattr(e, "status_code", 500),
         )
-    return {"cost": completion_cost(**data)}
 
 
 @router.get(

From 556ef8dd184c17428b0ab53b9cbdc708f41cf7ee Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 19:32:52 -0700
Subject: [PATCH 045/150] test - spend/calculate endpoints

---
 .../spend_management_endpoints.py             |   9 +-
 .../tests/test_spend_calculate_endpoint.py    | 103 ++++++++++++++++++
 2 files changed, 110 insertions(+), 2 deletions(-)
 create mode 100644 litellm/tests/test_spend_calculate_endpoint.py

diff --git a/litellm/proxy/spend_tracking/spend_management_endpoints.py b/litellm/proxy/spend_tracking/spend_management_endpoints.py
index 8089c7acb..abbdc3419 100644
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py
@@ -1298,9 +1298,14 @@ async def calculate_spend(request: SpendCalculateRequest):
                 )
             else:
                 _cost = completion_cost(model=request.model, messages=request.messages)
-        else:
-            _completion_response = litellm.ModelResponse(request.completion_response)
+        elif request.completion_response is not None:
+            _completion_response = litellm.ModelResponse(**request.completion_response)
             _cost = completion_cost(completion_response=_completion_response)
+        else:
+            raise HTTPException(
+                status_code=400,
+                detail="Bad Request - Either 'model' or 'completion_response' must be provided",
+            )
         return {"cost": _cost}
     except Exception as e:
         if isinstance(e, HTTPException):
diff --git a/litellm/tests/test_spend_calculate_endpoint.py b/litellm/tests/test_spend_calculate_endpoint.py
new file mode 100644
index 000000000..f8aff337e
--- /dev/null
+++ b/litellm/tests/test_spend_calculate_endpoint.py
@@ -0,0 +1,103 @@
+import os
+import sys
+
+import pytest
+from dotenv import load_dotenv
+from fastapi import Request
+from fastapi.routing import APIRoute
+
+import litellm
+from litellm.proxy._types import SpendCalculateRequest
+from litellm.proxy.spend_tracking.spend_management_endpoints import calculate_spend
+from litellm.router import Router
+
+# this file is to test litellm/proxy
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+@pytest.mark.asyncio
+async def test_spend_calc_model_messages():
+    cost_obj = await calculate_spend(
+        request=SpendCalculateRequest(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": "What is the capital of France?"},
+            ],
+        )
+    )
+
+    print("calculated cost", cost_obj)
+    cost = cost_obj["cost"]
+    assert cost > 0.0
+
+
+@pytest.mark.asyncio
+async def test_spend_calc_model_on_router_messages():
+    from litellm.proxy.proxy_server import llm_router as init_llm_router
+
+    temp_llm_router = Router(
+        model_list=[
+            {
+                "model_name": "special-llama-model",
+                "litellm_params": {
+                    "model": "groq/llama3-8b-8192",
+                },
+            }
+        ]
+    )
+
+    setattr(litellm.proxy.proxy_server, "llm_router", temp_llm_router)
+
+    cost_obj = await calculate_spend(
+        request=SpendCalculateRequest(
+            model="special-llama-model",
+            messages=[
+                {"role": "user", "content": "What is the capital of France?"},
+            ],
+        )
+    )
+
+    print("calculated cost", cost_obj)
+    _cost = cost_obj["cost"]
+
+    assert _cost > 0.0
+
+    # set router to init value
+    setattr(litellm.proxy.proxy_server, "llm_router", init_llm_router)
+
+
+@pytest.mark.asyncio
+async def test_spend_calc_using_response():
+    cost_obj = await calculate_spend(
+        request=SpendCalculateRequest(
+            completion_response={
+                "id": "chatcmpl-3bc7abcd-f70b-48ab-a16c-dfba0b286c86",
+                "choices": [
+                    {
+                        "finish_reason": "stop",
+                        "index": 0,
+                        "message": {
+                            "content": "Yooo! What's good?",
+                            "role": "assistant",
+                        },
+                    }
+                ],
+                "created": "1677652288",
+                "model": "groq/llama3-8b-8192",
+                "object": "chat.completion",
+                "system_fingerprint": "fp_873a560973",
+                "usage": {
+                    "completion_tokens": 8,
+                    "prompt_tokens": 12,
+                    "total_tokens": 20,
+                },
+            }
+        )
+    )
+
+    print("calculated cost", cost_obj)
+    cost = cost_obj["cost"]
+    assert cost > 0.0

From fa5eb5c96fff342385fb55a479dc58a37f529682 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 19:50:35 -0700
Subject: [PATCH 046/150] add helper to check route_in_additonal_public_routes

---
 litellm/proxy/auth/auth_utils.py        | 42 +++++++++++++++++++++++++
 litellm/proxy/auth/user_api_key_auth.py |  6 ++--
 2 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 litellm/proxy/auth/auth_utils.py

diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py
new file mode 100644
index 000000000..60e59a584
--- /dev/null
+++ b/litellm/proxy/auth/auth_utils.py
@@ -0,0 +1,42 @@
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy._types import LiteLLMRoutes
+from litellm.proxy.proxy_server import general_settings, premium_user
+
+
+def route_in_additonal_public_routes(current_route: str):
+    """
+    Helper to check if the user defined public_routes on config.yaml
+
+    Parameters:
+    - current_route: str - the route the user is trying to call
+
+    Returns:
+    - bool - True if the route is defined in public_routes
+    - bool - False if the route is not defined in public_routes
+
+
+    In order to use this the litellm config.yaml should have the following in general_settings:
+
+    ```yaml
+    general_settings:
+        master_key: sk-1234
+        public_routes: ["LiteLLMRoutes.public_routes", "/spend/calculate"]
+    ```
+    """
+
+    # check if user is premium_user - if not do nothing
+    try:
+        if premium_user is not True:
+            return False
+        # check if this is defined on the config
+        if general_settings is None:
+            return False
+
+        routes_defined = general_settings.get("public_routes", [])
+        if current_route in routes_defined:
+            return True
+
+        return False
+    except Exception as e:
+        verbose_proxy_logger.error(f"route_in_additonal_public_routes: {str(e)}")
+        return False
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index f6e3a0dfe..d3e937734 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -56,6 +56,7 @@ from litellm.proxy.auth.auth_checks import (
     get_user_object,
     log_to_opentelemetry,
 )
+from litellm.proxy.auth.auth_utils import route_in_additonal_public_routes
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.utils import _to_ns
 
@@ -137,8 +138,9 @@ async def user_api_key_auth(
         """
         route: str = request.url.path
 
-        if route in LiteLLMRoutes.public_routes.value or route in general_settings.get(
-            "public_routes", []
+        if (
+            route in LiteLLMRoutes.public_routes.value
+            or route_in_additonal_public_routes(current_route=route)
         ):
             # check if public endpoint
             return UserAPIKeyAuth(user_role=LitellmUserRoles.INTERNAL_USER_VIEW_ONLY)

From e7ff5014b148b4326689b537c4007899b13f0c07 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 19:51:23 -0700
Subject: [PATCH 047/150] example cofnig with public routes

---
 litellm/proxy/proxy_config.yaml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 8898dd8cb..caa6bc13b 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -21,9 +21,8 @@ model_list:
 general_settings:
   master_key: sk-1234
   alerting: ["slack", "email"]
-  public_routes: [
-    "/spend/calculate",
-  ]
+  public_routes: ["LiteLLMRoutes.public_routes", "/spend/calculate"]
+
 
 litellm_settings:
   success_callback: ["prometheus"]

From 412c1fc54111e5629aebdb407384146da97194e3 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 19:58:53 -0700
Subject: [PATCH 048/150] fix importing litellm

---
 litellm/proxy/auth/auth_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py
index 60e59a584..cc09a9689 100644
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm/proxy/auth/auth_utils.py
@@ -1,6 +1,4 @@
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import LiteLLMRoutes
-from litellm.proxy.proxy_server import general_settings, premium_user
 
 
 def route_in_additonal_public_routes(current_route: str):
@@ -25,6 +23,9 @@ def route_in_additonal_public_routes(current_route: str):
     """
 
     # check if user is premium_user - if not do nothing
+    from litellm.proxy._types import LiteLLMRoutes
+    from litellm.proxy.proxy_server import general_settings, premium_user
+
     try:
         if premium_user is not True:
             return False

From 7a961e8a0bcdf7662385086b2df875c9948d5b85 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 20:54:43 -0700
Subject: [PATCH 049/150] docs control available public routes

---
 docs/my-website/docs/enterprise.md       |  1 +
 docs/my-website/docs/proxy/enterprise.md | 43 ++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md
index 0edf937ed..2d45ea3ea 100644
--- a/docs/my-website/docs/enterprise.md
+++ b/docs/my-website/docs/enterprise.md
@@ -12,6 +12,7 @@ This covers:
 - ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui)
 - ✅ [**Audit Logs with retention policy**](../docs/proxy/enterprise.md#audit-logs)
 - ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
+- ✅ [**Control available public, private routes**](../docs/proxy/enterprise.md#control-available-public-private-routes)
 - ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
 - ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)
 - ✅ **Feature Prioritization**
diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md
index e657d3b73..40a5261cd 100644
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@@ -14,6 +14,7 @@ Features:
 - ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features)
 - ✅ [Audit Logs](#audit-logs)
 - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
+- ✅ [Control available public, private routes](#control-available-public-private-routes)
 - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
 - ✅ [Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations](#content-moderation)
 - ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection---lakeraai)
@@ -448,6 +449,48 @@ Expected Response
 
 
 
+## Control available public, private routes
+
+:::info
+
+❓ Use this when you want to make an existing private route -> public
+
+Example - Make `/spend/calculate` a publicly available route (by default `/spend/calculate` on LiteLLM Proxy requires authentication)
+
+:::
+
+#### Usage - Define public routes
+
+**Step 1** - set allowed public routes on config.yaml 
+
+`LiteLLMRoutes.public_routes` is an ENUM corresponding to the default public routes on LiteLLM. [You can see this here](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/_types.py)
+
+```yaml
+general_settings:
+  master_key: sk-1234
+  public_routes: ["LiteLLMRoutes.public_routes", "/spend/calculate"]
+```
+
+**Step 2** - start proxy 
+
+```shell
+litellm --config config.yaml
+```
+
+**Step 3** - Test it 
+
+```shell
+curl --request POST \
+  --url 'http://localhost:4000/spend/calculate' \
+  --header 'Content-Type: application/json' \
+  --data '{
+    "model": "gpt-4",
+    "messages": [{"role": "user", "content": "Hey, how'\''s it going?"}]
+  }'
+```
+
+🎉 Expect this endpoint to work without an `Authorization / Bearer Token`
+
 
 
 

From 690c7f6e477491a9192170d2dffec576c5371076 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 17:52:01 -0700
Subject: [PATCH 050/150] fix(router.py): log rejected router requests to
 langfuse

Fixes issue where rejected requests weren't being logged
---
 .gitignore                            |   1 +
 litellm/integrations/langfuse.py      |  38 ++--
 litellm/proxy/_new_secret_config.yaml |   4 +
 litellm/router.py                     | 262 ++++++++++++++------------
 4 files changed, 167 insertions(+), 138 deletions(-)

diff --git a/.gitignore b/.gitignore
index b633e1d3d..8a9095b84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,3 +61,4 @@ litellm/proxy/_experimental/out/model_hub/index.html
 litellm/proxy/_experimental/out/onboarding/index.html
 litellm/tests/log.txt
 litellm/tests/langfuse.log
+litellm/tests/langfuse.log
diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index eae8b8e22..794524684 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -36,9 +36,9 @@ class LangFuseLogger:
         self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
 
         parameters = {
-            "public_key": self.public_key,
-            "secret_key": self.secret_key,
-            "host": self.langfuse_host,
+            "public_key": "pk-lf-a65841e9-5192-4397-a679-cfff029fd5b0",
+            "secret_key": "sk-lf-d58c2891-3717-4f98-89dd-df44826215fd",
+            "host": "https://us.cloud.langfuse.com",
             "release": self.langfuse_release,
             "debug": self.langfuse_debug,
             "flush_interval": flush_interval,  # flush interval in seconds
@@ -311,22 +311,22 @@ class LangFuseLogger:
 
         try:
             tags = []
-            try:
-                metadata = copy.deepcopy(
-                    metadata
-                )  # Avoid modifying the original metadata
-            except:
-                new_metadata = {}
-                for key, value in metadata.items():
-                    if (
-                        isinstance(value, list)
-                        or isinstance(value, dict)
-                        or isinstance(value, str)
-                        or isinstance(value, int)
-                        or isinstance(value, float)
-                    ):
-                        new_metadata[key] = copy.deepcopy(value)
-                metadata = new_metadata
+            # try:
+            #     metadata = copy.deepcopy(
+            #         metadata
+            #     )  # Avoid modifying the original metadata
+            # except:
+            new_metadata = {}
+            for key, value in metadata.items():
+                if (
+                    isinstance(value, list)
+                    or isinstance(value, dict)
+                    or isinstance(value, str)
+                    or isinstance(value, int)
+                    or isinstance(value, float)
+                ):
+                    new_metadata[key] = copy.deepcopy(value)
+            metadata = new_metadata
 
             supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
             supports_prompt = Version(langfuse.version.__version__) >= Version("2.7.3")
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 78d7dc70c..16436c0ef 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -18,3 +18,7 @@ model_list:
 
 router_settings:
   enable_pre_call_checks: True
+
+
+litellm_settings:
+  failure_callback: ["langfuse"]
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index 6163da487..30bdbcba2 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -4474,17 +4474,13 @@ class Router:
                 raise ValueError(
                     f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}. Try again in {self.cooldown_time} seconds."
                 )
-            elif _context_window_error == True:
+            elif _context_window_error is True:
                 raise litellm.ContextWindowExceededError(
                     message="litellm._pre_call_checks: Context Window exceeded for given call. No models have context window large enough for this call.\n{}".format(
                         _potential_error_str
                     ),
                     model=model,
                     llm_provider="",
-                    response=httpx.Response(
-                        status_code=400,
-                        request=httpx.Request("GET", "https://example.com"),
-                    ),
                 )
         if len(invalid_model_indices) > 0:
             for idx in reversed(invalid_model_indices):
@@ -4596,127 +4592,155 @@ class Router:
                 specific_deployment=specific_deployment,
                 request_kwargs=request_kwargs,
             )
-
-        model, healthy_deployments = self._common_checks_available_deployment(
-            model=model,
-            messages=messages,
-            input=input,
-            specific_deployment=specific_deployment,
-        )  # type: ignore
-
-        if isinstance(healthy_deployments, dict):
-            return healthy_deployments
-
-        # filter out the deployments currently cooling down
-        deployments_to_remove = []
-        # cooldown_deployments is a list of model_id's cooling down, cooldown_deployments = ["16700539-b3cd-42f4-b426-6a12a1bb706a", "16700539-b3cd-42f4-b426-7899"]
-        cooldown_deployments = await self._async_get_cooldown_deployments()
-        verbose_router_logger.debug(
-            f"async cooldown deployments: {cooldown_deployments}"
-        )
-        # Find deployments in model_list whose model_id is cooling down
-        for deployment in healthy_deployments:
-            deployment_id = deployment["model_info"]["id"]
-            if deployment_id in cooldown_deployments:
-                deployments_to_remove.append(deployment)
-        # remove unhealthy deployments from healthy deployments
-        for deployment in deployments_to_remove:
-            healthy_deployments.remove(deployment)
-
-        # filter pre-call checks
-        _allowed_model_region = (
-            request_kwargs.get("allowed_model_region")
-            if request_kwargs is not None
-            else None
-        )
-
-        if self.enable_pre_call_checks and messages is not None:
-            healthy_deployments = self._pre_call_checks(
+        try:
+            model, healthy_deployments = self._common_checks_available_deployment(
                 model=model,
-                healthy_deployments=healthy_deployments,
-                messages=messages,
-                request_kwargs=request_kwargs,
-            )
-
-        if len(healthy_deployments) == 0:
-            if _allowed_model_region is None:
-                _allowed_model_region = "n/a"
-            raise ValueError(
-                f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}. pre-call-checks={self.enable_pre_call_checks}, allowed_model_region={_allowed_model_region}"
-            )
-
-        if (
-            self.routing_strategy == "usage-based-routing-v2"
-            and self.lowesttpm_logger_v2 is not None
-        ):
-            deployment = await self.lowesttpm_logger_v2.async_get_available_deployments(
-                model_group=model,
-                healthy_deployments=healthy_deployments,  # type: ignore
                 messages=messages,
                 input=input,
-            )
-        if (
-            self.routing_strategy == "cost-based-routing"
-            and self.lowestcost_logger is not None
-        ):
-            deployment = await self.lowestcost_logger.async_get_available_deployments(
-                model_group=model,
-                healthy_deployments=healthy_deployments,  # type: ignore
-                messages=messages,
-                input=input,
-            )
-        elif self.routing_strategy == "simple-shuffle":
-            # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
-            ############## Check if we can do a RPM/TPM based weighted pick #################
-            rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
-            if rpm is not None:
-                # use weight-random pick if rpms provided
-                rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
-                verbose_router_logger.debug(f"\nrpms {rpms}")
-                total_rpm = sum(rpms)
-                weights = [rpm / total_rpm for rpm in rpms]
-                verbose_router_logger.debug(f"\n weights {weights}")
-                # Perform weighted random pick
-                selected_index = random.choices(range(len(rpms)), weights=weights)[0]
-                verbose_router_logger.debug(f"\n selected index, {selected_index}")
-                deployment = healthy_deployments[selected_index]
-                verbose_router_logger.info(
-                    f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
-                )
-                return deployment or deployment[0]
-            ############## Check if we can do a RPM/TPM based weighted pick #################
-            tpm = healthy_deployments[0].get("litellm_params").get("tpm", None)
-            if tpm is not None:
-                # use weight-random pick if rpms provided
-                tpms = [m["litellm_params"].get("tpm", 0) for m in healthy_deployments]
-                verbose_router_logger.debug(f"\ntpms {tpms}")
-                total_tpm = sum(tpms)
-                weights = [tpm / total_tpm for tpm in tpms]
-                verbose_router_logger.debug(f"\n weights {weights}")
-                # Perform weighted random pick
-                selected_index = random.choices(range(len(tpms)), weights=weights)[0]
-                verbose_router_logger.debug(f"\n selected index, {selected_index}")
-                deployment = healthy_deployments[selected_index]
-                verbose_router_logger.info(
-                    f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
-                )
-                return deployment or deployment[0]
+                specific_deployment=specific_deployment,
+            )  # type: ignore
 
-            ############## No RPM/TPM passed, we do a random pick #################
-            item = random.choice(healthy_deployments)
-            return item or item[0]
-        if deployment is None:
+            if isinstance(healthy_deployments, dict):
+                return healthy_deployments
+
+            # filter out the deployments currently cooling down
+            deployments_to_remove = []
+            # cooldown_deployments is a list of model_id's cooling down, cooldown_deployments = ["16700539-b3cd-42f4-b426-6a12a1bb706a", "16700539-b3cd-42f4-b426-7899"]
+            cooldown_deployments = await self._async_get_cooldown_deployments()
+            verbose_router_logger.debug(
+                f"async cooldown deployments: {cooldown_deployments}"
+            )
+            # Find deployments in model_list whose model_id is cooling down
+            for deployment in healthy_deployments:
+                deployment_id = deployment["model_info"]["id"]
+                if deployment_id in cooldown_deployments:
+                    deployments_to_remove.append(deployment)
+            # remove unhealthy deployments from healthy deployments
+            for deployment in deployments_to_remove:
+                healthy_deployments.remove(deployment)
+
+            # filter pre-call checks
+            _allowed_model_region = (
+                request_kwargs.get("allowed_model_region")
+                if request_kwargs is not None
+                else None
+            )
+
+            if self.enable_pre_call_checks and messages is not None:
+                healthy_deployments = self._pre_call_checks(
+                    model=model,
+                    healthy_deployments=healthy_deployments,
+                    messages=messages,
+                    request_kwargs=request_kwargs,
+                )
+
+            if len(healthy_deployments) == 0:
+                if _allowed_model_region is None:
+                    _allowed_model_region = "n/a"
+                raise ValueError(
+                    f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}. pre-call-checks={self.enable_pre_call_checks}, allowed_model_region={_allowed_model_region}"
+                )
+
+            if (
+                self.routing_strategy == "usage-based-routing-v2"
+                and self.lowesttpm_logger_v2 is not None
+            ):
+                deployment = (
+                    await self.lowesttpm_logger_v2.async_get_available_deployments(
+                        model_group=model,
+                        healthy_deployments=healthy_deployments,  # type: ignore
+                        messages=messages,
+                        input=input,
+                    )
+                )
+            if (
+                self.routing_strategy == "cost-based-routing"
+                and self.lowestcost_logger is not None
+            ):
+                deployment = (
+                    await self.lowestcost_logger.async_get_available_deployments(
+                        model_group=model,
+                        healthy_deployments=healthy_deployments,  # type: ignore
+                        messages=messages,
+                        input=input,
+                    )
+                )
+            elif self.routing_strategy == "simple-shuffle":
+                # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
+                ############## Check if we can do a RPM/TPM based weighted pick #################
+                rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
+                if rpm is not None:
+                    # use weight-random pick if rpms provided
+                    rpms = [
+                        m["litellm_params"].get("rpm", 0) for m in healthy_deployments
+                    ]
+                    verbose_router_logger.debug(f"\nrpms {rpms}")
+                    total_rpm = sum(rpms)
+                    weights = [rpm / total_rpm for rpm in rpms]
+                    verbose_router_logger.debug(f"\n weights {weights}")
+                    # Perform weighted random pick
+                    selected_index = random.choices(range(len(rpms)), weights=weights)[
+                        0
+                    ]
+                    verbose_router_logger.debug(f"\n selected index, {selected_index}")
+                    deployment = healthy_deployments[selected_index]
+                    verbose_router_logger.info(
+                        f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
+                    )
+                    return deployment or deployment[0]
+                ############## Check if we can do a RPM/TPM based weighted pick #################
+                tpm = healthy_deployments[0].get("litellm_params").get("tpm", None)
+                if tpm is not None:
+                    # use weight-random pick if rpms provided
+                    tpms = [
+                        m["litellm_params"].get("tpm", 0) for m in healthy_deployments
+                    ]
+                    verbose_router_logger.debug(f"\ntpms {tpms}")
+                    total_tpm = sum(tpms)
+                    weights = [tpm / total_tpm for tpm in tpms]
+                    verbose_router_logger.debug(f"\n weights {weights}")
+                    # Perform weighted random pick
+                    selected_index = random.choices(range(len(tpms)), weights=weights)[
+                        0
+                    ]
+                    verbose_router_logger.debug(f"\n selected index, {selected_index}")
+                    deployment = healthy_deployments[selected_index]
+                    verbose_router_logger.info(
+                        f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
+                    )
+                    return deployment or deployment[0]
+
+                ############## No RPM/TPM passed, we do a random pick #################
+                item = random.choice(healthy_deployments)
+                return item or item[0]
+            if deployment is None:
+                verbose_router_logger.info(
+                    f"get_available_deployment for model: {model}, No deployment available"
+                )
+                raise ValueError(
+                    f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}"
+                )
             verbose_router_logger.info(
-                f"get_available_deployment for model: {model}, No deployment available"
+                f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
             )
-            raise ValueError(
-                f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}"
-            )
-        verbose_router_logger.info(
-            f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
-        )
 
-        return deployment
+            return deployment
+        except Exception as e:
+            traceback_exception = traceback.format_exc()
+            # if router rejects call -> log to langfuse/otel/etc.
+            if request_kwargs is not None:
+                logging_obj = request_kwargs.get("litellm_logging_obj", None)
+                if logging_obj is not None:
+                    ## LOGGING
+                    threading.Thread(
+                        target=logging_obj.failure_handler,
+                        args=(e, traceback_exception),
+                    ).start()  # log response
+                    # Handle any exceptions that might occur during streaming
+                    asyncio.create_task(
+                        logging_obj.async_failure_handler(e, traceback_exception)  # type: ignore
+                    )
+            raise e
 
     def get_available_deployment(
         self,

From 08f83e6392625e59e5d908529d12dc582cdb267a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 21:13:01 -0700
Subject: [PATCH 051/150] test - aliases on /spend/calculate

---
 .../tests/test_spend_calculate_endpoint.py    | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/litellm/tests/test_spend_calculate_endpoint.py b/litellm/tests/test_spend_calculate_endpoint.py
index f8aff337e..8bdd4a54d 100644
--- a/litellm/tests/test_spend_calculate_endpoint.py
+++ b/litellm/tests/test_spend_calculate_endpoint.py
@@ -101,3 +101,41 @@ async def test_spend_calc_using_response():
     print("calculated cost", cost_obj)
     cost = cost_obj["cost"]
     assert cost > 0.0
+
+
+@pytest.mark.asyncio
+async def test_spend_calc_model_alias_on_router_messages():
+    from litellm.proxy.proxy_server import llm_router as init_llm_router
+
+    temp_llm_router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-4o",
+                "litellm_params": {
+                    "model": "gpt-4o",
+                },
+            }
+        ],
+        model_group_alias={
+            "gpt4o": "gpt-4o",
+        },
+    )
+
+    setattr(litellm.proxy.proxy_server, "llm_router", temp_llm_router)
+
+    cost_obj = await calculate_spend(
+        request=SpendCalculateRequest(
+            model="gpt4o",
+            messages=[
+                {"role": "user", "content": "What is the capital of France?"},
+            ],
+        )
+    )
+
+    print("calculated cost", cost_obj)
+    _cost = cost_obj["cost"]
+
+    assert _cost > 0.0
+
+    # set router to init value
+    setattr(litellm.proxy.proxy_server, "llm_router", init_llm_router)

From c5020878e4b7d2eefd4cebfe056593fe3fed6d43 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 21:14:00 -0700
Subject: [PATCH 052/150] /spend/calculate use model aliases on this endpoint

---
 .../spend_management_endpoints.py             | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/litellm/proxy/spend_tracking/spend_management_endpoints.py b/litellm/proxy/spend_tracking/spend_management_endpoints.py
index abbdc3419..1fbd95b3c 100644
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py
@@ -1265,9 +1265,22 @@ async def calculate_spend(request: SpendCalculateRequest):
             _model_in_llm_router = None
             cost_per_token: Optional[CostPerToken] = None
             if llm_router is not None:
-                for model in llm_router.model_list:
-                    if model.get("model_name") == request.model:
-                        _model_in_llm_router = model
+                if (
+                    llm_router.model_group_alias is not None
+                    and request.model in llm_router.model_group_alias
+                ):
+                    # lookup alias in llm_router
+                    _model_group_name = llm_router.model_group_alias[request.model]
+                    for model in llm_router.model_list:
+                        if model.get("model_name") == _model_group_name:
+                            _model_in_llm_router = model
+
+                else:
+                    # no model_group aliases set -> try finding model in llm_router
+                    # find model in llm_router
+                    for model in llm_router.model_list:
+                        if model.get("model_name") == request.model:
+                            _model_in_llm_router = model
 
             """
             3 cases for /spend/calculate

From 4d7b2e578cb149f340f02a263b3fb2d12c66cad8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 24 Jun 2024 21:15:36 -0700
Subject: [PATCH 053/150] =?UTF-8?q?bump:=20version=201.40.25=20=E2=86=92?=
 =?UTF-8?q?=201.40.26?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fc3526dcc..6b4884b5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.25"
+version = "1.40.26"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.40.25"
+version = "1.40.26"
 version_files = [
     "pyproject.toml:^version"
 ]

From 673696222d16904add69308e460ff47e60360452 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 21:43:40 -0700
Subject: [PATCH 054/150] fix(langfuse.py): cleanup

---
 litellm/integrations/langfuse.py | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index 794524684..eae8b8e22 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -36,9 +36,9 @@ class LangFuseLogger:
         self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
 
         parameters = {
-            "public_key": "pk-lf-a65841e9-5192-4397-a679-cfff029fd5b0",
-            "secret_key": "sk-lf-d58c2891-3717-4f98-89dd-df44826215fd",
-            "host": "https://us.cloud.langfuse.com",
+            "public_key": self.public_key,
+            "secret_key": self.secret_key,
+            "host": self.langfuse_host,
             "release": self.langfuse_release,
             "debug": self.langfuse_debug,
             "flush_interval": flush_interval,  # flush interval in seconds
@@ -311,22 +311,22 @@ class LangFuseLogger:
 
         try:
             tags = []
-            # try:
-            #     metadata = copy.deepcopy(
-            #         metadata
-            #     )  # Avoid modifying the original metadata
-            # except:
-            new_metadata = {}
-            for key, value in metadata.items():
-                if (
-                    isinstance(value, list)
-                    or isinstance(value, dict)
-                    or isinstance(value, str)
-                    or isinstance(value, int)
-                    or isinstance(value, float)
-                ):
-                    new_metadata[key] = copy.deepcopy(value)
-            metadata = new_metadata
+            try:
+                metadata = copy.deepcopy(
+                    metadata
+                )  # Avoid modifying the original metadata
+            except:
+                new_metadata = {}
+                for key, value in metadata.items():
+                    if (
+                        isinstance(value, list)
+                        or isinstance(value, dict)
+                        or isinstance(value, str)
+                        or isinstance(value, int)
+                        or isinstance(value, float)
+                    ):
+                        new_metadata[key] = copy.deepcopy(value)
+                metadata = new_metadata
 
             supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
             supports_prompt = Version(langfuse.version.__version__) >= Version("2.7.3")

From bb64c68daf6f9fcb666093b16d626945dbbc1f96 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 24 Jun 2024 22:25:39 -0700
Subject: [PATCH 055/150] docs(routing.md): add quickstart

---
 docs/my-website/docs/routing.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index fd4fb8658..de0a4a796 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -901,6 +901,39 @@ response = await router.acompletion(
 
 If a call fails after num_retries, fall back to another model group. 
 
+### Quick Start 
+
+```python
+from litellm import Router 
+router = Router(
+	model_list=[
+		{ # bad model
+			"model_name": "bad-model",
+			"litellm_params": {
+				"model": "openai/my-bad-model",
+				"api_key": "my-bad-api-key",
+				"mock_response": "Bad call"
+			},
+		},
+		{ # good model
+			"model_name": "my-good-model",
+			"litellm_params": {
+				"model": "gpt-4o",
+				"api_key": os.getenv("OPENAI_API_KEY"),
+				"mock_response": "Good call"
+			},
+		},
+	],
+	fallbacks=[{"bad-model": ["my-good-model"]}] # 👈 KEY CHANGE
+)
+
+response = router.completion(
+	model="bad-model",
+	messages=[{"role": "user", "content": "Hey, how's it going?"}],
+	mock_testing_fallbacks=True,
+)
+```
+
 If the error is a context window exceeded error, fall back to a larger model group (if given). 
 
 Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.

From e1e8f5a00e868737c5a5fcb2962cc141ad520f28 Mon Sep 17 00:00:00 2001
From: corrm <islamnofl.official@gmail.com>
Date: Mon, 24 Jun 2024 05:54:58 +0300
Subject: [PATCH 056/150] chore: Improved prompt generation in ollama_pt
 function

---
 litellm/llms/prompt_templates/factory.py | 25 +++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 398e96af7..02ed93fae 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -172,14 +172,21 @@ def ollama_pt(
                             images.append(base64_image)
         return {"prompt": prompt, "images": images}
     else:
-        prompt = "".join(
-            (
-                m["content"]
-                if isinstance(m["content"], str) is str
-                else "".join(m["content"])
-            )
-            for m in messages
-        )
+        prompt = ""
+        for message in messages:
+            role = message["role"]
+            content = message.get("content", "")
+
+            if "tool_calls" in message:
+                for call in message["tool_calls"]:
+                    function_name = call["function"]["name"]
+                    arguments = json.loads(call["function"]["arguments"])
+                    prompt += f"### Tool Call ({call["id"]}):\nFunction: {function_name}\nArguments: {json.dumps(arguments)}\n\n"
+            elif "tool_call_id" in message:
+                prompt += f"### Tool Call Result ({message["tool_call_id"]}):\n{message["content"]}\n\n"
+            elif content:
+                prompt += f"### {role.capitalize()}:\n{content}\n\n"
+
     return prompt
 
 
@@ -710,7 +717,7 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
 
     """
     Anthropic tool_results look like:
-    
+
     [Successful results]
     <function_results>
     <result>

From 5d5227294dd3bc33931324f979fe726cfca0b661 Mon Sep 17 00:00:00 2001
From: corrm <islamnofl.official@gmail.com>
Date: Mon, 24 Jun 2024 05:55:22 +0300
Subject: [PATCH 057/150] chore: Improved OllamaConfig get_required_params and
 ollama_acompletion and ollama_async_streaming functions

---
 litellm/llms/ollama.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index e7dd1d5f5..1939715b3 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -126,7 +126,7 @@ class OllamaConfig:
             )
             and v is not None
         }
-    
+
     def get_required_params(self) -> List[ProviderField]:
         """For a given provider, return it's required fields with a description"""
         return [
@@ -451,7 +451,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                         {
                             "id": f"call_{str(uuid.uuid4())}",
                             "function": {
-                                "name": function_call["name"],
+                                "name": function_call.get("name", function_call.get("function", None)),
                                 "arguments": json.dumps(function_call["arguments"]),
                             },
                             "type": "function",

From a7efb9c332620147efb6d0730e76b5a82cd89f1a Mon Sep 17 00:00:00 2001
From: corrm <islamnofl.official@gmail.com>
Date: Mon, 24 Jun 2024 05:56:56 +0300
Subject: [PATCH 058/150] Added improved function name handling in
 ollama_async_streaming

---
 litellm/llms/ollama_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index a7439bbcc..af6fd5b80 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -434,7 +434,7 @@ async def ollama_async_streaming(
                         {
                             "id": f"call_{str(uuid.uuid4())}",
                             "function": {
-                                "name": function_call["name"],
+                                "name": function_call.get("name", function_call.get("function", None)),
                                 "arguments": json.dumps(function_call["arguments"]),
                             },
                             "type": "function",

From cc261580ac29754d0ea738b038c4a31c157038fa Mon Sep 17 00:00:00 2001
From: Islam Nofl <islamnofl.official@gmail.com>
Date: Mon, 24 Jun 2024 08:01:15 +0300
Subject: [PATCH 059/150] Rename ollama prompt 'Function' word to 'Name'

---
 litellm/llms/prompt_templates/factory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 02ed93fae..109c5b8d8 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -181,7 +181,7 @@ def ollama_pt(
                 for call in message["tool_calls"]:
                     function_name = call["function"]["name"]
                     arguments = json.loads(call["function"]["arguments"])
-                    prompt += f"### Tool Call ({call["id"]}):\nFunction: {function_name}\nArguments: {json.dumps(arguments)}\n\n"
+                    prompt += f"### Tool Call ({call["id"]}):\nName: {function_name}\nArguments: {json.dumps(arguments)}\n\n"
             elif "tool_call_id" in message:
                 prompt += f"### Tool Call Result ({message["tool_call_id"]}):\n{message["content"]}\n\n"
             elif content:

From ec36dd40d757f695238fe7fa0c0da690da686b81 Mon Sep 17 00:00:00 2001
From: corrm <islamnofl.official@gmail.com>
Date: Tue, 25 Jun 2024 12:40:07 +0300
Subject: [PATCH 060/150] Rename ollama prompt: - 'Function' word to
 'FunctionName' - 'Tool Call' to `FunctionCall` - 'Tool Call Result' to
 'FunctionCall Result'

_I found that changes make some models better_
---
 litellm/llms/prompt_templates/factory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 109c5b8d8..7864d5ebc 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -181,9 +181,9 @@ def ollama_pt(
                 for call in message["tool_calls"]:
                     function_name = call["function"]["name"]
                     arguments = json.loads(call["function"]["arguments"])
-                    prompt += f"### Tool Call ({call["id"]}):\nName: {function_name}\nArguments: {json.dumps(arguments)}\n\n"
+                    prompt += f"### FunctionCall ({call["id"]}):\nFunctionName: {function_name}\nArguments: {json.dumps(arguments)}\n\n"
             elif "tool_call_id" in message:
-                prompt += f"### Tool Call Result ({message["tool_call_id"]}):\n{message["content"]}\n\n"
+                prompt += f"### FunctionCall Result ({message["tool_call_id"]}):\n{message["content"]}\n\n"
             elif content:
                 prompt += f"### {role.capitalize()}:\n{content}\n\n"
 

From 2e7b0096d01152663c5d16be926ddf50b583a1e2 Mon Sep 17 00:00:00 2001
From: corrm <islamnofl.official@gmail.com>
Date: Tue, 25 Jun 2024 13:53:27 +0300
Subject: [PATCH 061/150] Improve ollama prompt: this formula give good result
 with AutoGen

---
 litellm/llms/prompt_templates/factory.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 7864d5ebc..e359d36f4 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -135,7 +135,7 @@ def convert_to_ollama_image(openai_image_url: str):
 
 
 def ollama_pt(
-    model, messages
+        model, messages
 ):  # https://github.com/ollama/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template
     if "instruct" in model:
         prompt = custom_prompt(
@@ -178,12 +178,27 @@ def ollama_pt(
             content = message.get("content", "")
 
             if "tool_calls" in message:
+                tool_calls = []
+
                 for call in message["tool_calls"]:
-                    function_name = call["function"]["name"]
+                    call_id: str = call["id"]
+                    function_name: str = call["function"]["name"]
                     arguments = json.loads(call["function"]["arguments"])
-                    prompt += f"### FunctionCall ({call["id"]}):\nFunctionName: {function_name}\nArguments: {json.dumps(arguments)}\n\n"
+
+                    tool_calls.append({
+                        "id": call_id,
+                        "type": "function",
+                        "function": {
+                            "name": function_name,
+                            "arguments": arguments
+                        }
+                    })
+
+                prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n"
+
             elif "tool_call_id" in message:
-                prompt += f"### FunctionCall Result ({message["tool_call_id"]}):\n{message["content"]}\n\n"
+                prompt += f"### User:\n{message["content"]}\n\n"
+
             elif content:
                 prompt += f"### {role.capitalize()}:\n{content}\n\n"
 

From 1946610efd0b2f5179b497d2a37ef9ab6e87e70f Mon Sep 17 00:00:00 2001
From: Kyrylo Yefimenko <kyrylo@cast.ai>
Date: Tue, 25 Jun 2024 16:36:40 +0100
Subject: [PATCH 062/150] Fix Groq prices

---
 model_prices_and_context_window.json | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index ef07d87cc..415d220f2 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -887,7 +887,7 @@
         "max_input_tokens": 8192,
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.00000005,
-        "output_cost_per_token": 0.00000010,
+        "output_cost_per_token": 0.00000008,
         "litellm_provider": "groq",
         "mode": "chat",
         "supports_function_calling": true
@@ -906,8 +906,8 @@
         "max_tokens": 32768,
         "max_input_tokens": 32768,
         "max_output_tokens": 32768,
-        "input_cost_per_token": 0.00000027,
-        "output_cost_per_token": 0.00000027,
+        "input_cost_per_token": 0.00000024,
+        "output_cost_per_token": 0.00000024,
         "litellm_provider": "groq",
         "mode": "chat",
         "supports_function_calling": true
@@ -916,8 +916,8 @@
         "max_tokens": 8192,
         "max_input_tokens": 8192,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000010,
-        "output_cost_per_token": 0.00000010,
+        "input_cost_per_token": 0.00000007,
+        "output_cost_per_token": 0.00000007,
         "litellm_provider": "groq",
         "mode": "chat",
         "supports_function_calling": true

From 43e42389d740467e0683fd5d1b9a0266552241d6 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 09:23:19 -0700
Subject: [PATCH 063/150] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 91b709442..ae354d1e3 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,7 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature
 
 > [!IMPORTANT]
 > LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
+> LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required.
 
 <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
   <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>

From c18b8bb01173d845e473690ba7d7d3f2e4edfd99 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 09:24:00 -0700
Subject: [PATCH 064/150] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ae354d1e3..6d26e92c2 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature
 # Usage ([**Docs**](https://docs.litellm.ai/docs/))
 
 > [!IMPORTANT]
-> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
+> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)  
 > LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required.
 
 <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">

From e471aca81f45b3f7e1da6acab5d3c18d4b0121af Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 22 Jun 2024 16:12:42 -0700
Subject: [PATCH 065/150] feat -  add debug_utils

---
 litellm/proxy/common_utils/debug_utils.py | 27 +++++++++++++++++++++++
 litellm/proxy/proxy_server.py             |  2 ++
 2 files changed, 29 insertions(+)
 create mode 100644 litellm/proxy/common_utils/debug_utils.py

diff --git a/litellm/proxy/common_utils/debug_utils.py b/litellm/proxy/common_utils/debug_utils.py
new file mode 100644
index 000000000..dc77958a6
--- /dev/null
+++ b/litellm/proxy/common_utils/debug_utils.py
@@ -0,0 +1,27 @@
+# Start tracing memory allocations
+import os
+import tracemalloc
+
+from fastapi import APIRouter
+
+from litellm._logging import verbose_proxy_logger
+
+router = APIRouter()
+
+if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
+    tracemalloc.start()
+
+    @router.get("/memory-usage", include_in_schema=False)
+    async def memory_usage():
+        # Take a snapshot of the current memory usage
+        snapshot = tracemalloc.take_snapshot()
+        top_stats = snapshot.statistics("lineno")
+        verbose_proxy_logger.debug("TOP STATS: %s", top_stats)
+
+        # Get the top 50 memory usage lines
+        top_50 = top_stats[:50]
+        result = []
+        for stat in top_50:
+            result.append(f"{stat.traceback.format()}: {stat.size / 1024} KiB")
+
+        return {"top_50_memory_usage": result}
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index a702cecbd..59ad7ba92 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -140,6 +140,7 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 
 ## Import All Misc routes here ##
 from litellm.proxy.caching_routes import router as caching_router
+from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.proxy.health_check import perform_health_check
 from litellm.proxy.health_endpoints._health_endpoints import router as health_router
@@ -9167,3 +9168,4 @@ app.include_router(team_router)
 app.include_router(spend_management_router)
 app.include_router(caching_router)
 app.include_router(analytics_router)
+app.include_router(debugging_endpoints_router)

From b55c31ec3fd5127e8595a3933d8eef0b87e4e48f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 08:53:06 -0700
Subject: [PATCH 066/150] add nvidia nim to __init__

---
 litellm/__init__.py                      |  3 +++
 litellm/llms/prompt_templates/factory.py | 17 ++++++++---------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index f07ce8809..d23247d53 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -401,6 +401,7 @@ openai_compatible_endpoints: List = [
     "codestral.mistral.ai/v1/chat/completions",
     "codestral.mistral.ai/v1/fim/completions",
     "api.groq.com/openai/v1",
+    "https://integrate.api.nvidia.com/v1",
     "api.deepseek.com/v1",
     "api.together.xyz/v1",
     "inference.friendli.ai/v1",
@@ -411,6 +412,7 @@ openai_compatible_providers: List = [
     "anyscale",
     "mistral",
     "groq",
+    "nvidia_nim",
     "codestral",
     "deepseek",
     "deepinfra",
@@ -640,6 +642,7 @@ provider_list: List = [
     "anyscale",
     "mistral",
     "groq",
+    "nvidia_nim",
     "codestral",
     "text-completion-codestral",
     "deepseek",
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index e359d36f4..a97d6812c 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -135,7 +135,7 @@ def convert_to_ollama_image(openai_image_url: str):
 
 
 def ollama_pt(
-        model, messages
+    model, messages
 ):  # https://github.com/ollama/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template
     if "instruct" in model:
         prompt = custom_prompt(
@@ -185,19 +185,18 @@ def ollama_pt(
                     function_name: str = call["function"]["name"]
                     arguments = json.loads(call["function"]["arguments"])
 
-                    tool_calls.append({
-                        "id": call_id,
-                        "type": "function",
-                        "function": {
-                            "name": function_name,
-                            "arguments": arguments
+                    tool_calls.append(
+                        {
+                            "id": call_id,
+                            "type": "function",
+                            "function": {"name": function_name, "arguments": arguments},
                         }
-                    })
+                    )
 
                 prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n"
 
             elif "tool_call_id" in message:
-                prompt += f"### User:\n{message["content"]}\n\n"
+                prompt += f"### User:\n{message['content']}\n\n"
 
             elif content:
                 prompt += f"### {role.capitalize()}:\n{content}\n\n"

From 11ea1474b179774557fc5b7311fa06797812b499 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 08:57:11 -0700
Subject: [PATCH 067/150] feat - add nvidia nim to main.py

---
 litellm/main.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/litellm/main.py b/litellm/main.py
index 307659c8a..8c531643b 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -348,6 +348,7 @@ async def acompletion(
             or custom_llm_provider == "deepinfra"
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
+            or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "codestral"
             or custom_llm_provider == "text-completion-codestral"
             or custom_llm_provider == "deepseek"
@@ -1171,6 +1172,7 @@ def completion(
             or custom_llm_provider == "deepinfra"
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
+            or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "codestral"
             or custom_llm_provider == "deepseek"
             or custom_llm_provider == "anyscale"
@@ -2932,6 +2934,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
             or custom_llm_provider == "deepinfra"
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
+            or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "deepseek"
             or custom_llm_provider == "fireworks_ai"
             or custom_llm_provider == "ollama"
@@ -3507,6 +3510,7 @@ async def atext_completion(
             or custom_llm_provider == "deepinfra"
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
+            or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "text-completion-codestral"
             or custom_llm_provider == "deepseek"
             or custom_llm_provider == "fireworks_ai"

From a9f17d141c4816094fd8ab7cb6bac07f05ab2b8a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 09:13:08 -0700
Subject: [PATCH 068/150] feat - add param mapping for nvidia nim

---
 litellm/__init__.py        |  1 +
 litellm/llms/nvidia_nim.py | 79 ++++++++++++++++++++++++++++++++++++++
 litellm/utils.py           | 23 +++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 litellm/llms/nvidia_nim.py

diff --git a/litellm/__init__.py b/litellm/__init__.py
index d23247d53..08ee84aaa 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -816,6 +816,7 @@ from .llms.openai import (
     DeepInfraConfig,
     AzureAIStudioConfig,
 )
+from .llms.nvidia_nim import NvidiaNimConfig
 from .llms.text_completion_codestral import MistralTextCompletionConfig
 from .llms.azure import (
     AzureOpenAIConfig,
diff --git a/litellm/llms/nvidia_nim.py b/litellm/llms/nvidia_nim.py
new file mode 100644
index 000000000..ebcc84c13
--- /dev/null
+++ b/litellm/llms/nvidia_nim.py
@@ -0,0 +1,79 @@
+"""
+Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer 
+
+This is OpenAI compatible 
+
+This file only contains param mapping logic
+
+API calling is done using the OpenAI SDK with an api_base
+"""
+
+import types
+from typing import Optional, Union
+
+
+class NvidiaNimConfig:
+    """
+    Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
+
+    The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
+    """
+
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    frequency_penalty: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    max_tokens: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+
+    def __init__(
+        self,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        frequency_penalty: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return [
+            "stream",
+            "temperature",
+            "top_p",
+            "frequency_penalty",
+            "presence_penalty",
+            "max_tokens",
+            "stop",
+        ]
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict
+    ) -> dict:
+        supported_openai_params = self.get_supported_openai_params()
+        for param, value in non_default_params.items():
+            if param in supported_openai_params:
+                optional_params[param] = value
+        return optional_params
diff --git a/litellm/utils.py b/litellm/utils.py
index 1bc8bf771..7709e8821 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2410,6 +2410,7 @@ def get_optional_params(
             and custom_llm_provider != "anyscale"
             and custom_llm_provider != "together_ai"
             and custom_llm_provider != "groq"
+            and custom_llm_provider != "nvidia_nim"
             and custom_llm_provider != "deepseek"
             and custom_llm_provider != "codestral"
             and custom_llm_provider != "mistral"
@@ -3060,6 +3061,14 @@ def get_optional_params(
         optional_params = litellm.DatabricksConfig().map_openai_params(
             non_default_params=non_default_params, optional_params=optional_params
         )
+    elif custom_llm_provider == "nvidia_nim":
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.NvidiaNimConfig().map_openai_params(
+            non_default_params=non_default_params, optional_params=optional_params
+        )
     elif custom_llm_provider == "groq":
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
@@ -3626,6 +3635,8 @@ def get_supported_openai_params(
         return litellm.OllamaChatConfig().get_supported_openai_params()
     elif custom_llm_provider == "anthropic":
         return litellm.AnthropicConfig().get_supported_openai_params()
+    elif custom_llm_provider == "nvidia_nim":
+        return litellm.NvidiaNimConfig().get_supported_openai_params()
     elif custom_llm_provider == "groq":
         return [
             "temperature",
@@ -3986,6 +3997,10 @@ def get_llm_provider(
                 # groq is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.groq.com/openai/v1
                 api_base = "https://api.groq.com/openai/v1"
                 dynamic_api_key = get_secret("GROQ_API_KEY")
+            elif custom_llm_provider == "nvidia_nim":
+                # nvidia_nim is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
+                api_base = "https://integrate.api.nvidia.com/v1"
+                dynamic_api_key = get_secret("NVIDIA_NIM_API_KEY")
             elif custom_llm_provider == "codestral":
                 # codestral is openai compatible, we just need to set this to custom_openai and have the api_base be https://codestral.mistral.ai/v1
                 api_base = "https://codestral.mistral.ai/v1"
@@ -4087,6 +4102,9 @@ def get_llm_provider(
                     elif endpoint == "api.groq.com/openai/v1":
                         custom_llm_provider = "groq"
                         dynamic_api_key = get_secret("GROQ_API_KEY")
+                    elif endpoint == "https://integrate.api.nvidia.com/v1":
+                        custom_llm_provider = "nvidia_nim"
+                        dynamic_api_key = get_secret("NVIDIA_NIM_API_KEY")
                     elif endpoint == "https://codestral.mistral.ai/v1":
                         custom_llm_provider = "codestral"
                         dynamic_api_key = get_secret("CODESTRAL_API_KEY")
@@ -4900,6 +4918,11 @@ def validate_environment(model: Optional[str] = None) -> dict:
                 keys_in_environment = True
             else:
                 missing_keys.append("GROQ_API_KEY")
+        elif custom_llm_provider == "nvidia_nim":
+            if "NVIDIA_NIM_API_KEY" in os.environ:
+                keys_in_environment = True
+            else:
+                missing_keys.append("NVIDIA_NIM_API_KEY")
         elif (
             custom_llm_provider == "codestral"
             or custom_llm_provider == "text-completion-codestral"

From ee0d8341b0f04c829ff5e5ea78891b5d960adffc Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 09:16:31 -0700
Subject: [PATCH 069/150] test - nvidia nim

---
 litellm/tests/test_completion.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 830b3acd3..0c6da360b 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -3470,6 +3470,28 @@ def test_completion_deep_infra_mistral():
 # test_completion_deep_infra_mistral()
 
 
+def test_completion_nvidia_nim():
+    model_name = "nvidia_nim/databricks/dbrx-instruct"
+    try:
+        response = completion(
+            model=model_name,
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Boston today in Fahrenheit?",
+                }
+            ],
+        )
+        # Add any assertions here to check the response
+        print(response)
+        assert response.choices[0].message.content is not None
+        assert len(response.choices[0].message.content) > 0
+    except litellm.exceptions.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 # Gemini tests
 @pytest.mark.parametrize(
     "model",

From 22b7c0333d02b8045fde85fc974752f2665ea30d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 09:38:34 -0700
Subject: [PATCH 070/150] docs - add nvidia nim

---
 docs/my-website/docs/providers/nvidia_nim.md | 103 +++++++++++++++++++
 docs/my-website/sidebars.js                  |   5 +-
 2 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 docs/my-website/docs/providers/nvidia_nim.md

diff --git a/docs/my-website/docs/providers/nvidia_nim.md b/docs/my-website/docs/providers/nvidia_nim.md
new file mode 100644
index 000000000..f90450768
--- /dev/null
+++ b/docs/my-website/docs/providers/nvidia_nim.md
@@ -0,0 +1,103 @@
+# Nvidia NIM
+https://docs.api.nvidia.com/nim/reference/
+
+:::tip
+
+**We support ALL Nvidia NIM models, just set `model=nvidia_nim/<any-model-on-nvidia_nim>` as a prefix when sending litellm requests**
+
+:::
+
+## API Key
+```python
+# env variable
+os.environ['NVIDIA_NIM_API_KEY']
+```
+
+## Sample Usage
+```python
+from litellm import completion
+import os
+
+os.environ['NVIDIA_NIM_API_KEY'] = ""
+response = completion(
+    model=model_name,
+    messages=[
+        {
+            "role": "user",
+            "content": "What's the weather like in Boston today in Fahrenheit?",
+        }
+    ],
+    temperature=0.2,        # optional
+    top_p=0.9,              # optional
+    frequency_penalty=0.1,  # optional
+    presence_penalty=0.1,   # optional
+    max_tokens=10,          # optional
+    stop=["\n\n"],          # optional
+)
+print(response)
+```
+
+## Sample Usage - Streaming
+```python
+from litellm import completion
+import os
+
+os.environ['NVIDIA_NIM_API_KEY'] = ""
+response = completion(
+    model=model_name,
+    messages=[
+        {
+            "role": "user",
+            "content": "What's the weather like in Boston today in Fahrenheit?",
+        }
+    ],
+    stream=True,
+    temperature=0.2,        # optional
+    top_p=0.9,              # optional
+    frequency_penalty=0.1,  # optional
+    presence_penalty=0.1,   # optional
+    max_tokens=10,          # optional
+    stop=["\n\n"],          # optional
+)
+
+for chunk in response:
+    print(chunk)
+```
+
+
+## Supported Models - 💥 ALL Nvidia NIM Models Supported!
+We support ALL `nvidia_nim` models, just set `nvidia_nim/` as a prefix when sending completion requests
+
+| Model Name | Function Call |
+|------------|---------------|
+| nvidia/nemotron-4-340b-reward | `completion(model="nvidia_nim/nvidia/nemotron-4-340b-reward", messages)` |
+| 01-ai/yi-large | `completion(model="nvidia_nim/01-ai/yi-large", messages)` |
+| aisingapore/sea-lion-7b-instruct | `completion(model="nvidia_nim/aisingapore/sea-lion-7b-instruct", messages)` |
+| databricks/dbrx-instruct | `completion(model="nvidia_nim/databricks/dbrx-instruct", messages)` |
+| google/gemma-7b | `completion(model="nvidia_nim/google/gemma-7b", messages)` |
+| google/gemma-2b | `completion(model="nvidia_nim/google/gemma-2b", messages)` |
+| google/codegemma-1.1-7b | `completion(model="nvidia_nim/google/codegemma-1.1-7b", messages)` |
+| google/codegemma-7b | `completion(model="nvidia_nim/google/codegemma-7b", messages)` |
+| google/recurrentgemma-2b | `completion(model="nvidia_nim/google/recurrentgemma-2b", messages)` |
+| ibm/granite-34b-code-instruct | `completion(model="nvidia_nim/ibm/granite-34b-code-instruct", messages)` |
+| ibm/granite-8b-code-instruct | `completion(model="nvidia_nim/ibm/granite-8b-code-instruct", messages)` |
+| mediatek/breeze-7b-instruct | `completion(model="nvidia_nim/mediatek/breeze-7b-instruct", messages)` |
+| meta/codellama-70b | `completion(model="nvidia_nim/meta/codellama-70b", messages)` |
+| meta/llama2-70b | `completion(model="nvidia_nim/meta/llama2-70b", messages)` |
+| meta/llama3-8b | `completion(model="nvidia_nim/meta/llama3-8b", messages)` |
+| meta/llama3-70b | `completion(model="nvidia_nim/meta/llama3-70b", messages)` |
+| microsoft/phi-3-medium-4k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-medium-4k-instruct", messages)` |
+| microsoft/phi-3-mini-128k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-mini-128k-instruct", messages)` |
+| microsoft/phi-3-mini-4k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-mini-4k-instruct", messages)` |
+| microsoft/phi-3-small-128k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-small-128k-instruct", messages)` |
+| microsoft/phi-3-small-8k-instruct | `completion(model="nvidia_nim/microsoft/phi-3-small-8k-instruct", messages)` |
+| mistralai/codestral-22b-instruct-v0.1 | `completion(model="nvidia_nim/mistralai/codestral-22b-instruct-v0.1", messages)` |
+| mistralai/mistral-7b-instruct | `completion(model="nvidia_nim/mistralai/mistral-7b-instruct", messages)` |
+| mistralai/mistral-7b-instruct-v0.3 | `completion(model="nvidia_nim/mistralai/mistral-7b-instruct-v0.3", messages)` |
+| mistralai/mixtral-8x7b-instruct | `completion(model="nvidia_nim/mistralai/mixtral-8x7b-instruct", messages)` |
+| mistralai/mixtral-8x22b-instruct | `completion(model="nvidia_nim/mistralai/mixtral-8x22b-instruct", messages)` |
+| mistralai/mistral-large | `completion(model="nvidia_nim/mistralai/mistral-large", messages)` |
+| nvidia/nemotron-4-340b-instruct | `completion(model="nvidia_nim/nvidia/nemotron-4-340b-instruct", messages)` |
+| seallms/seallm-7b-v2.5 | `completion(model="nvidia_nim/seallms/seallm-7b-v2.5", messages)` |
+| snowflake/arctic | `completion(model="nvidia_nim/snowflake/arctic", messages)` |
+| upstage/solar-10.7b-instruct | `completion(model="nvidia_nim/upstage/solar-10.7b-instruct", messages)` |
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 2673933f4..9835a260b 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -146,13 +146,14 @@ const sidebars = {
         "providers/databricks",
         "providers/watsonx",
         "providers/predibase",
-        "providers/clarifai",
+        "providers/nvidia_nim", 
         "providers/triton-inference-server",
         "providers/ollama", 
         "providers/perplexity", 
         "providers/groq", 
         "providers/deepseek", 
-        "providers/fireworks_ai", 
+        "providers/fireworks_ai",
+        "providers/clarifai", 
         "providers/vllm", 
         "providers/xinference", 
         "providers/cloudflare_workers", 

From 48b8345ae6a84ad96ff94cc4ec6bfff8bdbec51f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 09:46:42 -0700
Subject: [PATCH 071/150] docs nvidia_nim

---
 docs/my-website/docs/providers/nvidia_nim.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/my-website/docs/providers/nvidia_nim.md b/docs/my-website/docs/providers/nvidia_nim.md
index f90450768..7f895aa33 100644
--- a/docs/my-website/docs/providers/nvidia_nim.md
+++ b/docs/my-website/docs/providers/nvidia_nim.md
@@ -20,7 +20,7 @@ import os
 
 os.environ['NVIDIA_NIM_API_KEY'] = ""
 response = completion(
-    model=model_name,
+    model="nvidia_nim/meta/llama3-70b-instruct",
     messages=[
         {
             "role": "user",
@@ -44,7 +44,7 @@ import os
 
 os.environ['NVIDIA_NIM_API_KEY'] = ""
 response = completion(
-    model=model_name,
+    model="nvidia_nim/meta/llama3-70b-instruct",
     messages=[
         {
             "role": "user",

From 6a6b8613ea86e4ff5849fa79c44f65dbff69355a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 09:48:49 -0700
Subject: [PATCH 072/150] ci/cd run again

---
 litellm/tests/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 0c6da360b..30ae1d0ab 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries=3
+# litellm.num_retries = 3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"

From 9b47ba72cbb756a6166a2ef4774907c0bdb580d6 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 10:57:32 -0700
Subject: [PATCH 073/150] feat(router.py): support mock testing content policy
 + context window fallbacks

---
 litellm/proxy/_new_secret_config.yaml | 70 +++++++++++++++++++--------
 litellm/router.py                     | 26 ++++++++++
 2 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 16436c0ef..75545bb60 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,24 +1,54 @@
-model_list: 
-  - model_name: my-fake-model
-    litellm_params:
-      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
-      api_key: my-fake-key
-      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
-      mock_response: "Hello world 1"
-    model_info: 
-      max_input_tokens: 0 # trigger context window fallback
-  - model_name: my-fake-model
-    litellm_params:
-      model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
-      api_key: my-fake-key
-      aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
-      mock_response: "Hello world 2"
-    model_info: 
-      max_input_tokens: 0
+# model_list: 
+#   - model_name: my-fake-model
+#     litellm_params:
+#       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+#       api_key: my-fake-key
+#       aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
+#       mock_response: "Hello world 1"
+#     model_info: 
+#       max_input_tokens: 0 # trigger context window fallback
+#   - model_name: my-fake-model
+#     litellm_params:
+#       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+#       api_key: my-fake-key
+#       aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
+#       mock_response: "Hello world 2"
+#     model_info: 
+#       max_input_tokens: 0
 
-router_settings:
-  enable_pre_call_checks: True
+# router_settings:
+#   enable_pre_call_checks: True
 
 
+# litellm_settings:
+#   failure_callback: ["langfuse"]
+
+model_list:
+  - model_name: summarize
+    litellm_params:
+        model: openai/gpt-4o
+        rpm: 10000      
+        tpm: 12000000
+        api_key: os.environ/OPENAI_API_KEY
+        mock_response: Hello world 1
+
+  - model_name: summarize-l
+    litellm_params:
+        model: claude-3-5-sonnet-20240620
+        rpm: 4000
+        tpm: 400000
+        api_key: os.environ/ANTHROPIC_API_KEY
+        mock_response: Hello world 2
+
 litellm_settings:
-  failure_callback: ["langfuse"]
\ No newline at end of file
+  num_retries: 3
+  request_timeout: 120
+  allowed_fails: 3
+  # fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
+  context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
+
+
+
+router_settings:
+  routing_strategy: simple-shuffle
+  enable_pre_call_checks: true.
diff --git a/litellm/router.py b/litellm/router.py
index 30bdbcba2..8256a6752 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2117,6 +2117,12 @@ class Router:
         If it fails after num_retries, fall back to another model group
         """
         mock_testing_fallbacks = kwargs.pop("mock_testing_fallbacks", None)
+        mock_testing_context_fallbacks = kwargs.pop(
+            "mock_testing_context_fallbacks", None
+        )
+        mock_testing_content_policy_fallbacks = kwargs.pop(
+            "mock_testing_content_policy_fallbacks", None
+        )
         model_group = kwargs.get("model")
         fallbacks = kwargs.get("fallbacks", self.fallbacks)
         context_window_fallbacks = kwargs.get(
@@ -2130,6 +2136,26 @@ class Router:
                 raise Exception(
                     f"This is a mock exception for model={model_group}, to trigger a fallback. Fallbacks={fallbacks}"
                 )
+            elif (
+                mock_testing_context_fallbacks is not None
+                and mock_testing_context_fallbacks is True
+            ):
+                raise litellm.ContextWindowExceededError(
+                    model=model_group,
+                    llm_provider="",
+                    message=f"This is a mock exception for model={model_group}, to trigger a fallback. \
+                        Context_Window_Fallbacks={context_window_fallbacks}",
+                )
+            elif (
+                mock_testing_content_policy_fallbacks is not None
+                and mock_testing_content_policy_fallbacks is True
+            ):
+                raise litellm.ContentPolicyViolationError(
+                    model=model_group,
+                    llm_provider="",
+                    message=f"This is a mock exception for model={model_group}, to trigger a fallback. \
+                        Context_Policy_Fallbacks={content_policy_fallbacks}",
+                )
 
             response = await self.async_function_with_retries(*args, **kwargs)
             verbose_router_logger.debug(f"Async Response: {response}")

From 81fd42258cba5c8003541bd2dc95e6d713c5354a Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 11:07:07 -0700
Subject: [PATCH 074/150] docs(reliability.md): add doc on mock testing
 fallbacks

---
 docs/my-website/docs/proxy/reliability.md     | 61 +++++++++++++++++++
 ...odel_prices_and_context_window_backup.json | 10 +--
 2 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md
index a2d24da69..c07fc3c26 100644
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@@ -431,6 +431,67 @@ litellm_settings:
   content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}]
 ```
 
+
+
+### Test Fallbacks! 
+
+Check if your fallbacks are working as expected. 
+
+#### **Regular Fallbacks**
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+  "model": "my-bad-model",
+  "messages": [
+    {
+      "role": "user",
+      "content": "ping"
+    }
+  ],
+  "mock_testing_fallbacks": true # 👈 KEY CHANGE
+}
+'
+```
+
+#### **Content Policy Fallbacks**
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+  "model": "my-bad-model",
+  "messages": [
+    {
+      "role": "user",
+      "content": "ping"
+    }
+  ],
+  "mock_testing_content_policy_fallbacks": true # 👈 KEY CHANGE
+}
+'
+```
+
+#### **Context Window Fallbacks**
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+  "model": "my-bad-model",
+  "messages": [
+    {
+      "role": "user",
+      "content": "ping"
+    }
+  ],
+  "mock_testing_context_window_fallbacks": true # 👈 KEY CHANGE
+}
+'
+```
+
 ### EU-Region Filtering (Pre-Call Checks)
 
 **Before call is made** check if a call is within model context window with  **`enable_pre_call_checks: true`**.
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index ef07d87cc..415d220f2 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -887,7 +887,7 @@
         "max_input_tokens": 8192,
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.00000005,
-        "output_cost_per_token": 0.00000010,
+        "output_cost_per_token": 0.00000008,
         "litellm_provider": "groq",
         "mode": "chat",
         "supports_function_calling": true
@@ -906,8 +906,8 @@
         "max_tokens": 32768,
         "max_input_tokens": 32768,
         "max_output_tokens": 32768,
-        "input_cost_per_token": 0.00000027,
-        "output_cost_per_token": 0.00000027,
+        "input_cost_per_token": 0.00000024,
+        "output_cost_per_token": 0.00000024,
         "litellm_provider": "groq",
         "mode": "chat",
         "supports_function_calling": true
@@ -916,8 +916,8 @@
         "max_tokens": 8192,
         "max_input_tokens": 8192,
         "max_output_tokens": 8192,
-        "input_cost_per_token": 0.00000010,
-        "output_cost_per_token": 0.00000010,
+        "input_cost_per_token": 0.00000007,
+        "output_cost_per_token": 0.00000007,
         "litellm_provider": "groq",
         "mode": "chat",
         "supports_function_calling": true

From d07f8b6d3b9a2f28c9e4e52a3cd6ccbe839fdfb0 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 10:50:47 -0700
Subject: [PATCH 075/150] feat - use n in mock completion

---
 litellm/main.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 8c531643b..adf53d078 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -429,6 +429,7 @@ def mock_completion(
     model: str,
     messages: List,
     stream: Optional[bool] = False,
+    n: Optional[int] = None,
     mock_response: Union[str, Exception, dict] = "This is a mock request",
     mock_tool_calls: Optional[List] = None,
     logging=None,
@@ -497,8 +498,19 @@ def mock_completion(
                 model_response, mock_response=mock_response, model=model
             )
             return response
-
-        model_response["choices"][0]["message"]["content"] = mock_response
+        if n is None:
+            model_response["choices"][0]["message"]["content"] = mock_response
+        else:
+            _all_choices = []
+            for i in range(n):
+                _choice = litellm.utils.Choices(
+                    index=i,
+                    message=litellm.utils.Message(
+                        content=mock_response, role="assistant"
+                    ),
+                )
+                _all_choices.append(_choice)
+            model_response["choices"] = _all_choices
         model_response["created"] = int(time.time())
         model_response["model"] = model
 
@@ -945,6 +957,7 @@ def completion(
                 model,
                 messages,
                 stream=stream,
+                n=n,
                 mock_response=mock_response,
                 mock_tool_calls=mock_tool_calls,
                 logging=logging,

From 61b9ff9ac22aa6061d550d0943a5727e575bca2c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 10:54:18 -0700
Subject: [PATCH 076/150] test - test_mock_request_n_greater_than_1

---
 litellm/tests/test_mock_request.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/litellm/tests/test_mock_request.py b/litellm/tests/test_mock_request.py
index 7d670feb5..6b58c94b2 100644
--- a/litellm/tests/test_mock_request.py
+++ b/litellm/tests/test_mock_request.py
@@ -58,3 +58,18 @@ async def test_async_mock_streaming_request():
     assert (
         complete_response == "LiteLLM is awesome"
     ), f"Unexpected response got {complete_response}"
+
+
+def test_mock_request_n_greater_than_1():
+    try:
+        model = "gpt-3.5-turbo"
+        messages = [{"role": "user", "content": "Hey, I'm a mock request"}]
+        response = litellm.mock_completion(model=model, messages=messages, n=5)
+        print("response: ", response)
+
+        assert len(response.choices) == 5
+        for choice in response.choices:
+            assert choice.message.content == "This is a mock request"
+
+    except:
+        traceback.print_exc()

From b3ef4755c3f320ee0e0ec510a69816569baf5b06 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 11:14:40 -0700
Subject: [PATCH 077/150] fix using mock completion

---
 litellm/main.py                    |  7 ++++--
 litellm/tests/test_mock_request.py | 19 +++++++++++++++
 litellm/utils.py                   | 39 +++++++++++++++++++++++++-----
 3 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index adf53d078..573b2c19f 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -488,14 +488,17 @@ def mock_completion(
             if kwargs.get("acompletion", False) == True:
                 return CustomStreamWrapper(
                     completion_stream=async_mock_completion_streaming_obj(
-                        model_response, mock_response=mock_response, model=model
+                        model_response, mock_response=mock_response, model=model, n=n
                     ),
                     model=model,
                     custom_llm_provider="openai",
                     logging_obj=logging,
                 )
             response = mock_completion_streaming_obj(
-                model_response, mock_response=mock_response, model=model
+                model_response,
+                mock_response=mock_response,
+                model=model,
+                n=n,
             )
             return response
         if n is None:
diff --git a/litellm/tests/test_mock_request.py b/litellm/tests/test_mock_request.py
index 6b58c94b2..48b054371 100644
--- a/litellm/tests/test_mock_request.py
+++ b/litellm/tests/test_mock_request.py
@@ -73,3 +73,22 @@ def test_mock_request_n_greater_than_1():
 
     except:
         traceback.print_exc()
+
+
+@pytest.mark.asyncio()
+async def test_async_mock_streaming_request_n_greater_than_1():
+    generator = await litellm.acompletion(
+        messages=[{"role": "user", "content": "Why is LiteLLM amazing?"}],
+        mock_response="LiteLLM is awesome",
+        stream=True,
+        model="gpt-3.5-turbo",
+        n=5,
+    )
+    complete_response = ""
+    async for chunk in generator:
+        print(chunk)
+        # complete_response += chunk["choices"][0]["delta"]["content"] or ""
+
+    # assert (
+    #     complete_response == "LiteLLM is awesome"
+    # ), f"Unexpected response got {complete_response}"
diff --git a/litellm/utils.py b/litellm/utils.py
index 7709e8821..854998901 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -9731,18 +9731,45 @@ class TextCompletionStreamWrapper:
             raise StopAsyncIteration
 
 
-def mock_completion_streaming_obj(model_response, mock_response, model):
+def mock_completion_streaming_obj(
+    model_response, mock_response, model, n: Optional[int] = None
+):
     for i in range(0, len(mock_response), 3):
-        completion_obj = {"role": "assistant", "content": mock_response[i : i + 3]}
-        model_response.choices[0].delta = completion_obj
+        completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
+        if n is None:
+            model_response.choices[0].delta = completion_obj
+        else:
+            _all_choices = []
+            for j in range(n):
+                _streaming_choice = litellm.utils.StreamingChoices(
+                    index=j,
+                    delta=litellm.utils.Delta(
+                        role="assistant", content=mock_response[i : i + 3]
+                    ),
+                )
+                _all_choices.append(_streaming_choice)
+            model_response.choices = _all_choices
         yield model_response
 
 
-async def async_mock_completion_streaming_obj(model_response, mock_response, model):
+async def async_mock_completion_streaming_obj(
+    model_response, mock_response, model, n: Optional[int] = None
+):
     for i in range(0, len(mock_response), 3):
         completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
-        model_response.choices[0].delta = completion_obj
-        model_response.choices[0].finish_reason = "stop"
+        if n is None:
+            model_response.choices[0].delta = completion_obj
+        else:
+            _all_choices = []
+            for j in range(n):
+                _streaming_choice = litellm.utils.StreamingChoices(
+                    index=j,
+                    delta=litellm.utils.Delta(
+                        role="assistant", content=mock_response[i : i + 3]
+                    ),
+                )
+                _all_choices.append(_streaming_choice)
+            model_response.choices = _all_choices
         yield model_response
 
 

From c45d93b93af1dadd0b3572498d6014598ca7e4dc Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 11:26:56 -0700
Subject: [PATCH 078/150] fix(router.py): improve error message returned for
 fallbacks

---
 litellm/proxy/_new_secret_config.yaml  |   2 +-
 litellm/router.py                      | 142 ++++++++++++++-----------
 litellm/tests/test_router_fallbacks.py |   4 +-
 3 files changed, 85 insertions(+), 63 deletions(-)

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 75545bb60..938e74b5e 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -45,7 +45,7 @@ litellm_settings:
   request_timeout: 120
   allowed_fails: 3
   # fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
-  context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
+  # context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
 
 
 
diff --git a/litellm/router.py b/litellm/router.py
index 8256a6752..840df5b54 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2175,73 +2175,93 @@ class Router:
                     )
                 ):  # don't retry a malformed request
                     raise e
-                if (
-                    isinstance(e, litellm.ContextWindowExceededError)
-                    and context_window_fallbacks is not None
-                ):
-                    fallback_model_group = None
-                    for (
-                        item
-                    ) in context_window_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
-                        if list(item.keys())[0] == model_group:
-                            fallback_model_group = item[model_group]
-                            break
+                if isinstance(e, litellm.ContextWindowExceededError):
+                    if context_window_fallbacks is not None:
+                        fallback_model_group = None
+                        for (
+                            item
+                        ) in context_window_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                            if list(item.keys())[0] == model_group:
+                                fallback_model_group = item[model_group]
+                                break
 
-                    if fallback_model_group is None:
-                        raise original_exception
+                        if fallback_model_group is None:
+                            raise original_exception
 
-                    for mg in fallback_model_group:
-                        """
-                        Iterate through the model groups and try calling that deployment
-                        """
-                        try:
-                            kwargs["model"] = mg
-                            kwargs.setdefault("metadata", {}).update(
-                                {"model_group": mg}
-                            )  # update model_group used, if fallbacks are done
-                            response = await self.async_function_with_retries(
-                                *args, **kwargs
+                        for mg in fallback_model_group:
+                            """
+                            Iterate through the model groups and try calling that deployment
+                            """
+                            try:
+                                kwargs["model"] = mg
+                                kwargs.setdefault("metadata", {}).update(
+                                    {"model_group": mg}
+                                )  # update model_group used, if fallbacks are done
+                                response = await self.async_function_with_retries(
+                                    *args, **kwargs
+                                )
+                                verbose_router_logger.info(
+                                    "Successful fallback b/w models."
+                                )
+                                return response
+                            except Exception as e:
+                                pass
+                    else:
+                        error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
+                            model_group, context_window_fallbacks, fallbacks
+                        )
+                        verbose_router_logger.info(
+                            msg="Got 'ContextWindowExceededError'. No context_window_fallback set. Defaulting \
+                            to fallbacks, if available.{}".format(
+                                error_message
                             )
-                            verbose_router_logger.info(
-                                "Successful fallback b/w models."
-                            )
-                            return response
-                        except Exception as e:
-                            pass
-                elif (
-                    isinstance(e, litellm.ContentPolicyViolationError)
-                    and content_policy_fallbacks is not None
-                ):
-                    fallback_model_group = None
-                    for (
-                        item
-                    ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
-                        if list(item.keys())[0] == model_group:
-                            fallback_model_group = item[model_group]
-                            break
+                        )
 
-                    if fallback_model_group is None:
-                        raise original_exception
+                        e.message += "\n{}".format(error_message)
+                elif isinstance(e, litellm.ContentPolicyViolationError):
+                    if content_policy_fallbacks is not None:
+                        fallback_model_group = None
+                        for (
+                            item
+                        ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                            if list(item.keys())[0] == model_group:
+                                fallback_model_group = item[model_group]
+                                break
 
-                    for mg in fallback_model_group:
-                        """
-                        Iterate through the model groups and try calling that deployment
-                        """
-                        try:
-                            kwargs["model"] = mg
-                            kwargs.setdefault("metadata", {}).update(
-                                {"model_group": mg}
-                            )  # update model_group used, if fallbacks are done
-                            response = await self.async_function_with_retries(
-                                *args, **kwargs
+                        if fallback_model_group is None:
+                            raise original_exception
+
+                        for mg in fallback_model_group:
+                            """
+                            Iterate through the model groups and try calling that deployment
+                            """
+                            try:
+                                kwargs["model"] = mg
+                                kwargs.setdefault("metadata", {}).update(
+                                    {"model_group": mg}
+                                )  # update model_group used, if fallbacks are done
+                                response = await self.async_function_with_retries(
+                                    *args, **kwargs
+                                )
+                                verbose_router_logger.info(
+                                    "Successful fallback b/w models."
+                                )
+                                return response
+                            except Exception as e:
+                                pass
+                    else:
+                        error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
+                            model_group, content_policy_fallbacks, fallbacks
+                        )
+                        verbose_router_logger.info(
+                            msg="Got 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting \
+                            to fallbacks, if available.{}".format(
+                                error_message
                             )
-                            verbose_router_logger.info(
-                                "Successful fallback b/w models."
-                            )
-                            return response
-                        except Exception as e:
-                            pass
-                elif fallbacks is not None:
+                        )
+
+                        e.message += "\n{}".format(error_message)
+                if fallbacks is not None:
                     verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
                     generic_fallback_idx: Optional[int] = None
                     ## check for specific model group-specific fallbacks
diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py
index 99d2a600c..2c552a64b 100644
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@@ -1129,7 +1129,9 @@ async def test_router_content_policy_fallbacks(
         mock_response = Exception("content filtering policy")
     else:
         mock_response = litellm.ModelResponse(
-            choices=[litellm.Choices(finish_reason="content_filter")]
+            choices=[litellm.Choices(finish_reason="content_filter")],
+            model="gpt-3.5-turbo",
+            usage=litellm.Usage(prompt_tokens=10, completion_tokens=0, total_tokens=10),
         )
     router = Router(
         model_list=[

From 9909b1d70a39917a53973911107e227388f4940c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 11:47:39 -0700
Subject: [PATCH 079/150] fix(utils.py): add coverage for anthropic content
 policy error - vertex ai

---
 litellm/utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index 854998901..9f6ebaff0 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6470,7 +6470,11 @@ def exception_type(
                         ),
                         litellm_debug_info=extra_information,
                     )
-                elif "The response was blocked." in error_str:
+                elif (
+                    "The response was blocked." in error_str
+                    or "Output blocked by content filtering policy"
+                    in error_str  # anthropic on vertex ai
+                ):
                     exception_mapping_worked = True
                     raise ContentPolicyViolationError(
                         message=f"VertexAIException ContentPolicyViolationError - {error_str}",

From d7643eb9f238d0d01da46ad97c20a7df61b79552 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 16:23:56 -0700
Subject: [PATCH 080/150] feat - add secret detection

---
 .../enterprise_hooks/secret_detection.py      | 164 ++++++++++++++++++
 1 file changed, 164 insertions(+)
 create mode 100644 enterprise/enterprise_hooks/secret_detection.py

diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
new file mode 100644
index 000000000..75a578b2c
--- /dev/null
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -0,0 +1,164 @@
+# +-------------------------------------------------------------+
+#
+#           Use SecretDetection /moderations for your LLM calls
+#
+# +-------------------------------------------------------------+
+#  Thank you users! We ❤️ you! - Krrish & Ishaan
+
+import sys, os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from typing import Optional, Literal, Union
+import litellm, traceback, sys, uuid
+from litellm.caching import DualCache
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.integrations.custom_logger import CustomLogger
+from fastapi import HTTPException
+from litellm._logging import verbose_proxy_logger
+from litellm.utils import (
+    ModelResponse,
+    EmbeddingResponse,
+    ImageResponse,
+    StreamingChoices,
+)
+from datetime import datetime
+import aiohttp, asyncio
+from litellm._logging import verbose_proxy_logger
+import tempfile
+from litellm._logging import verbose_proxy_logger
+
+
+litellm.set_verbose = True
+
+
+class _ENTERPRISE_SecretDetection(CustomLogger):
+    def __init__(self):
+        pass
+
+    def scan_message_for_secrets(self, message_content: str):
+        from detect_secrets import SecretsCollection
+        from detect_secrets.settings import default_settings
+
+        temp_file = tempfile.NamedTemporaryFile(delete=False)
+        temp_file.write(message_content.encode("utf-8"))
+        temp_file.close()
+
+        secrets = SecretsCollection()
+        with default_settings():
+            secrets.scan_file(temp_file.name)
+
+        os.remove(temp_file.name)
+
+        detected_secrets = []
+        for file in secrets.files:
+            for found_secret in secrets[file]:
+                if found_secret.secret_value is None:
+                    continue
+                detected_secrets.append(
+                    {"type": found_secret.type, "value": found_secret.secret_value}
+                )
+
+        return detected_secrets
+
+    #### CALL HOOKS - proxy only ####
+    def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: str,  # "completion", "embeddings", "image_generation", "moderation"
+    ):
+        from detect_secrets import SecretsCollection
+        from detect_secrets.settings import default_settings
+
+        if "messages" in data and isinstance(data["messages"], list):
+            for message in data["messages"]:
+                if "content" in message and isinstance(message["content"], str):
+                    detected_secrets = self.scan_message_for_secrets(message["content"])
+
+                    for secret in detected_secrets:
+                        message["content"] = message["content"].replace(
+                            secret["value"], "[REDACTED]"
+                        )
+
+                    if len(detected_secrets) > 0:
+                        secret_types = [secret["type"] for secret in detected_secrets]
+                        verbose_proxy_logger.warning(
+                            f"Detected and redacted secrets in message: {secret_types}"
+                        )
+
+        if "prompt" in data:
+            if isinstance(data["prompt"], str):
+                detected_secrets = self.scan_message_for_secrets(data["prompt"])
+                for secret in detected_secrets:
+                    data["prompt"] = data["prompt"].replace(
+                        secret["value"], "[REDACTED]"
+                    )
+                if len(detected_secrets) > 0:
+                    secret_types = [secret["type"] for secret in detected_secrets]
+                    verbose_proxy_logger.warning(
+                        f"Detected and redacted secrets in prompt: {secret_types}"
+                    )
+            elif isinstance(data["prompt"], list):
+                for item in data["prompt"]:
+                    if isinstance(item, str):
+                        detected_secrets = self.scan_message_for_secrets(item)
+                        for secret in detected_secrets:
+                            item = item.replace(secret["value"], "[REDACTED]")
+                        if len(detected_secrets) > 0:
+                            secret_types = [
+                                secret["type"] for secret in detected_secrets
+                            ]
+                            verbose_proxy_logger.warning(
+                                f"Detected and redacted secrets in prompt: {secret_types}"
+                            )
+
+        if "input" in data:
+            if isinstance(data["input"], str):
+                detected_secrets = self.scan_message_for_secrets(data["input"])
+                for secret in detected_secrets:
+                    data["input"] = data["input"].replace(secret["value"], "[REDACTED]")
+                if len(detected_secrets) > 0:
+                    secret_types = [secret["type"] for secret in detected_secrets]
+                    verbose_proxy_logger.warning(
+                        f"Detected and redacted secrets in input: {secret_types}"
+                    )
+            elif isinstance(data["input"], list):
+                for item in data["input"]:
+                    if isinstance(item, str):
+                        detected_secrets = self.scan_message_for_secrets(item)
+                        for secret in detected_secrets:
+                            item = item.replace(secret["value"], "[REDACTED]")
+                        if len(detected_secrets) > 0:
+                            secret_types = [
+                                secret["type"] for secret in detected_secrets
+                            ]
+                            verbose_proxy_logger.warning(
+                                f"Detected and redacted secrets in input: {secret_types}"
+                            )
+
+
+# secretDetect = _ENTERPRISE_SecretDetection()
+
+# from litellm.caching import DualCache
+# print("running hook to detect a secret")
+# test_data = {
+#     "messages": [
+#         {"role": "user", "content": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef'"},
+#         {"role": "assistant", "content": "Hello! I'm doing well. How can I assist you today?"},
+#         {"role": "user", "content": "this is my OPENAI_API_KEY = 'sk_1234567890abcdef'"},
+#          {"role": "user", "content": "i think it is sk-1234567890abcdef"},
+#     ],
+#     "model": "gpt-3.5-turbo",
+# }
+# secretDetect.async_pre_call_hook(
+#     data=test_data,
+#     user_api_key_dict=UserAPIKeyAuth(token="your_api_key"),
+#     cache=DualCache(),
+#     call_type="completion",
+# )
+
+
+# print("finished hook to detect a secret - test data=", test_data)

From 350e87f1d6bc0ccc56ef9b89377279adbc3c4c9e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 16:25:14 -0700
Subject: [PATCH 081/150] init secret detection callback

---
 litellm/proxy/proxy_server.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 59ad7ba92..c3b855c5f 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1479,6 +1479,21 @@ class ProxyConfig:
 
                                 llama_guard_object = _ENTERPRISE_LlamaGuard()
                                 imported_list.append(llama_guard_object)
+                            elif (
+                                isinstance(callback, str) and callback == "hide_secrets"
+                            ):
+                                from enterprise.enterprise_hooks.secret_detection import (
+                                    _ENTERPRISE_SecretDetection,
+                                )
+
+                                if premium_user != True:
+                                    raise Exception(
+                                        "Trying to use secret hiding"
+                                        + CommonProxyErrors.not_premium_user.value
+                                    )
+
+                                _secret_detection_object = _ENTERPRISE_SecretDetection()
+                                imported_list.append(_secret_detection_object)
                             elif (
                                 isinstance(callback, str)
                                 and callback == "openai_moderations"

From 8a7b16102ce953ae72db270ca7b7a8fdc7e03c1e Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 16:38:47 -0700
Subject: [PATCH 082/150] fix async_pre_call_hook

---
 enterprise/enterprise_hooks/secret_detection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
index 75a578b2c..ade8b7172 100644
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -63,7 +63,7 @@ class _ENTERPRISE_SecretDetection(CustomLogger):
         return detected_secrets
 
     #### CALL HOOKS - proxy only ####
-    def async_pre_call_hook(
+    async def async_pre_call_hook(
         self,
         user_api_key_dict: UserAPIKeyAuth,
         cache: DualCache,

From 92eed810779cfb57afec90994f1bb3441b6f26e7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 17:25:53 -0700
Subject: [PATCH 083/150] clean up secret detection

---
 .../enterprise_hooks/secret_detection.py      | 33 ++++---------------
 requirements.txt                              |  1 +
 2 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
index ade8b7172..ded9f27c1 100644
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -126,11 +126,14 @@ class _ENTERPRISE_SecretDetection(CustomLogger):
                         f"Detected and redacted secrets in input: {secret_types}"
                     )
             elif isinstance(data["input"], list):
-                for item in data["input"]:
+                _input_in_request = data["input"]
+                for idx, item in enumerate(_input_in_request):
                     if isinstance(item, str):
                         detected_secrets = self.scan_message_for_secrets(item)
                         for secret in detected_secrets:
-                            item = item.replace(secret["value"], "[REDACTED]")
+                            _input_in_request[idx] = item.replace(
+                                secret["value"], "[REDACTED]"
+                            )
                         if len(detected_secrets) > 0:
                             secret_types = [
                                 secret["type"] for secret in detected_secrets
@@ -138,27 +141,5 @@ class _ENTERPRISE_SecretDetection(CustomLogger):
                             verbose_proxy_logger.warning(
                                 f"Detected and redacted secrets in input: {secret_types}"
                             )
-
-
-# secretDetect = _ENTERPRISE_SecretDetection()
-
-# from litellm.caching import DualCache
-# print("running hook to detect a secret")
-# test_data = {
-#     "messages": [
-#         {"role": "user", "content": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef'"},
-#         {"role": "assistant", "content": "Hello! I'm doing well. How can I assist you today?"},
-#         {"role": "user", "content": "this is my OPENAI_API_KEY = 'sk_1234567890abcdef'"},
-#          {"role": "user", "content": "i think it is sk-1234567890abcdef"},
-#     ],
-#     "model": "gpt-3.5-turbo",
-# }
-# secretDetect.async_pre_call_hook(
-#     data=test_data,
-#     user_api_key_dict=UserAPIKeyAuth(token="your_api_key"),
-#     cache=DualCache(),
-#     call_type="completion",
-# )
-
-
-# print("finished hook to detect a secret - test data=", test_data)
+                verbose_proxy_logger.debug("Data after redacting input %s", data)
+        return
diff --git a/requirements.txt b/requirements.txt
index fbf2bfc1d..e40c44e4d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,6 +31,7 @@ azure-identity==1.16.1 # for azure content safety
 opentelemetry-api==1.25.0
 opentelemetry-sdk==1.25.0
 opentelemetry-exporter-otlp==1.25.0
+detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
 
 ### LITELLM PACKAGE DEPENDENCIES
 python-dotenv==1.0.0 # for env 

From 1962a248025889bd598d47786e6b820b59476fd9 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 17:27:02 -0700
Subject: [PATCH 084/150] test secret detection

---
 litellm/proxy/proxy_config.yaml          |   2 +-
 litellm/tests/test_secret_detect_hook.py | 216 +++++++++++++++++++++++
 2 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 litellm/tests/test_secret_detect_hook.py

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index caa6bc13b..0c0365f43 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -26,7 +26,7 @@ general_settings:
 
 litellm_settings:
   success_callback: ["prometheus"]
-  callbacks: ["otel"]
+  callbacks: ["otel", "hide_secrets"]
   failure_callback: ["prometheus"]
   store_audit_logs: true
   redact_messages_in_exceptions: True
diff --git a/litellm/tests/test_secret_detect_hook.py b/litellm/tests/test_secret_detect_hook.py
new file mode 100644
index 000000000..a1bf10eba
--- /dev/null
+++ b/litellm/tests/test_secret_detect_hook.py
@@ -0,0 +1,216 @@
+# What is this?
+## This tests the llm guard integration
+
+import asyncio
+import os
+import random
+
+# What is this?
+## Unit test for presidio pii masking
+import sys
+import time
+import traceback
+from datetime import datetime
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import pytest
+
+import litellm
+from litellm import Router, mock_completion
+from litellm.caching import DualCache
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
+    _ENTERPRISE_SecretDetection,
+)
+from litellm.proxy.utils import ProxyLogging, hash_token
+
+### UNIT TESTS FOR OpenAI Moderation ###
+
+
+@pytest.mark.asyncio
+async def test_basic_secret_detection_chat():
+    """
+    Tests to see if secret detection hook will mask api keys
+
+
+    It should mask the following API_KEY = 'sk_1234567890abcdef' and  OPENAI_API_KEY = 'sk_1234567890abcdef'
+    """
+    secret_instance = _ENTERPRISE_SecretDetection()
+    _api_key = "sk-12345"
+    _api_key = hash_token("sk-12345")
+    user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
+    local_cache = DualCache()
+
+    from litellm.proxy.proxy_server import llm_router
+
+    test_data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef'",
+            },
+            {
+                "role": "assistant",
+                "content": "Hello! I'm doing well. How can I assist you today?",
+            },
+            {
+                "role": "user",
+                "content": "this is my OPENAI_API_KEY = 'sk_1234567890abcdef'",
+            },
+            {"role": "user", "content": "i think it is +1 412-555-5555"},
+        ],
+        "model": "gpt-3.5-turbo",
+    }
+
+    await secret_instance.async_pre_call_hook(
+        cache=local_cache,
+        data=test_data,
+        user_api_key_dict=user_api_key_dict,
+        call_type="completion",
+    )
+    print(
+        "test data after running pre_call_hook: Expect all API Keys to be masked",
+        test_data,
+    )
+
+    assert test_data == {
+        "messages": [
+            {"role": "user", "content": "Hey, how's it going, API_KEY = '[REDACTED]'"},
+            {
+                "role": "assistant",
+                "content": "Hello! I'm doing well. How can I assist you today?",
+            },
+            {"role": "user", "content": "this is my OPENAI_API_KEY = '[REDACTED]'"},
+            {"role": "user", "content": "i think it is +1 412-555-5555"},
+        ],
+        "model": "gpt-3.5-turbo",
+    }, "Expect all API Keys to be masked"
+
+
+@pytest.mark.asyncio
+async def test_basic_secret_detection_text_completion():
+    """
+    Tests to see if secret detection hook will mask api keys
+
+
+    It should mask the following API_KEY = 'sk_1234567890abcdef' and  OPENAI_API_KEY = 'sk_1234567890abcdef'
+    """
+    secret_instance = _ENTERPRISE_SecretDetection()
+    _api_key = "sk-12345"
+    _api_key = hash_token("sk-12345")
+    user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
+    local_cache = DualCache()
+
+    from litellm.proxy.proxy_server import llm_router
+
+    test_data = {
+        "prompt": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef', my OPENAI_API_KEY = 'sk_1234567890abcdef' and i want to know what is the weather",
+        "model": "gpt-3.5-turbo",
+    }
+
+    await secret_instance.async_pre_call_hook(
+        cache=local_cache,
+        data=test_data,
+        user_api_key_dict=user_api_key_dict,
+        call_type="completion",
+    )
+
+    test_data == {
+        "prompt": "Hey, how's it going, API_KEY = '[REDACTED]', my OPENAI_API_KEY = '[REDACTED]' and i want to know what is the weather",
+        "model": "gpt-3.5-turbo",
+    }
+    print(
+        "test data after running pre_call_hook: Expect all API Keys to be masked",
+        test_data,
+    )
+
+
+@pytest.mark.asyncio
+async def test_basic_secret_detection_embeddings():
+    """
+    Tests to see if secret detection hook will mask api keys
+
+
+    It should mask the following API_KEY = 'sk_1234567890abcdef' and  OPENAI_API_KEY = 'sk_1234567890abcdef'
+    """
+    secret_instance = _ENTERPRISE_SecretDetection()
+    _api_key = "sk-12345"
+    _api_key = hash_token("sk-12345")
+    user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
+    local_cache = DualCache()
+
+    from litellm.proxy.proxy_server import llm_router
+
+    test_data = {
+        "input": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef', my OPENAI_API_KEY = 'sk_1234567890abcdef' and i want to know what is the weather",
+        "model": "gpt-3.5-turbo",
+    }
+
+    await secret_instance.async_pre_call_hook(
+        cache=local_cache,
+        data=test_data,
+        user_api_key_dict=user_api_key_dict,
+        call_type="embedding",
+    )
+
+    assert test_data == {
+        "input": "Hey, how's it going, API_KEY = '[REDACTED]', my OPENAI_API_KEY = '[REDACTED]' and i want to know what is the weather",
+        "model": "gpt-3.5-turbo",
+    }
+    print(
+        "test data after running pre_call_hook: Expect all API Keys to be masked",
+        test_data,
+    )
+
+
+@pytest.mark.asyncio
+async def test_basic_secret_detection_embeddings_list():
+    """
+    Tests to see if secret detection hook will mask api keys
+
+
+    It should mask the following API_KEY = 'sk_1234567890abcdef' and  OPENAI_API_KEY = 'sk_1234567890abcdef'
+    """
+    secret_instance = _ENTERPRISE_SecretDetection()
+    _api_key = "sk-12345"
+    _api_key = hash_token("sk-12345")
+    user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
+    local_cache = DualCache()
+
+    from litellm.proxy.proxy_server import llm_router
+
+    test_data = {
+        "input": [
+            "hey",
+            "how's it going, API_KEY = 'sk_1234567890abcdef'",
+            "my OPENAI_API_KEY = 'sk_1234567890abcdef' and i want to know what is the weather",
+        ],
+        "model": "gpt-3.5-turbo",
+    }
+
+    await secret_instance.async_pre_call_hook(
+        cache=local_cache,
+        data=test_data,
+        user_api_key_dict=user_api_key_dict,
+        call_type="embedding",
+    )
+
+    print(
+        "test data after running pre_call_hook: Expect all API Keys to be masked",
+        test_data,
+    )
+    assert test_data == {
+        "input": [
+            "hey",
+            "how's it going, API_KEY = '[REDACTED]'",
+            "my OPENAI_API_KEY = '[REDACTED]' and i want to know what is the weather",
+        ],
+        "model": "gpt-3.5-turbo",
+    }

From 13f4ecb7ecbb3db7d2a59632af12a1b40f8436cd Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 17:42:54 -0700
Subject: [PATCH 085/150] docs - secret detection

---
 docs/my-website/docs/enterprise.md       |   3 +-
 docs/my-website/docs/proxy/enterprise.md | 101 +++++++++++++++++++++--
 2 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md
index 2d45ea3ea..875aec57f 100644
--- a/docs/my-website/docs/enterprise.md
+++ b/docs/my-website/docs/enterprise.md
@@ -13,7 +13,8 @@ This covers:
 - ✅ [**Audit Logs with retention policy**](../docs/proxy/enterprise.md#audit-logs)
 - ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
 - ✅ [**Control available public, private routes**](../docs/proxy/enterprise.md#control-available-public-private-routes)
-- ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
+- ✅ [**Guardrails, Content Moderation, PII Masking, Secret/API Key Masking**](../docs/proxy/enterprise.md#prompt-injection-detection---lakeraai)
+- ✅ [**Prompt Injection Detection**](../docs/proxy/enterprise.md#prompt-injection-detection---lakeraai)
 - ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)
 - ✅ **Feature Prioritization**
 - ✅ **Custom Integrations**
diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md
index 40a5261cd..9fff879e5 100644
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@@ -15,10 +15,10 @@ Features:
 - ✅ [Audit Logs](#audit-logs)
 - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
 - ✅ [Control available public, private routes](#control-available-public-private-routes)
-- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
-- ✅ [Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations](#content-moderation)
+- ✅ [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](#content-moderation)
 - ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection---lakeraai)
 - ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
+- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
 - ✅ Reject calls from Blocked User list 
 - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
 
@@ -495,7 +495,98 @@ curl --request POST \
 
 
 ## Content Moderation
-#### Content Moderation with LLM Guard
+### Content Moderation - Secret Detection
+❓ Use this to REDACT API Keys, Secrets sent in requests to an LLM. 
+
+Example if you want to redact the value of `OPENAI_API_KEY` in the following request
+
+#### Incoming Request 
+
+```json
+{
+    "messages": [
+        {
+            "role": "user",
+            "content": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef'",
+        }
+    ]
+}
+```
+
+#### Request after Moderation
+
+```json
+{
+    "messages": [
+        {
+            "role": "user",
+            "content": "Hey, how's it going, API_KEY = '[REDACTED]'",
+        }
+    ]
+}
+```
+
+**Usage**
+
+**Step 1** Add this to your config.yaml 
+
+```yaml
+litellm_settings:
+  callbacks: ["hide_secrets"]
+```
+
+**Step 2** Run litellm proxy with `--detailed_debug` to see the server logs
+
+```
+litellm --config config.yaml --detailed_debug
+```
+
+**Step 3** Test it with request
+
+Send this request
+```shell
+curl --location 'http://localhost:4000/chat/completions' \
+    --header 'Authorization: Bearer sk-1234' \
+    --header 'Content-Type: application/json' \
+    --data '{
+    "model": "llama3",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what is the value of my open ai key? openai_api_key=sk-1234998222"
+        }
+    ]
+}'
+```
+
+
+Expect to see the following warning on your litellm server logs
+
+```shell
+LiteLLM Proxy:WARNING: secret_detection.py:88 - Detected and redacted secrets in message: ['Secret Keyword']
+```
+
+
+You can also see the raw request sent from litellm to the API Provider
+```json
+POST Request Sent from LiteLLM:
+curl -X POST \
+https://api.groq.com/openai/v1/ \
+-H 'Authorization: Bearer gsk_mySVchjY********************************************' \
+-d {
+  "model": "llama3-8b-8192",
+  "messages": [
+    {
+      "role": "user",
+      "content": "what is the time today, openai_api_key=[REDACTED]"
+    }
+  ],
+  "stream": false,
+  "extra_body": {}
+}
+```
+
+### Content Moderation with LLM Guard
 
 Set the LLM Guard API Base in your environment 
 
@@ -630,7 +721,7 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
 </TabItem>
 </Tabs>
 
-#### Content Moderation with LlamaGuard 
+### Content Moderation with LlamaGuard 
 
 Currently works with Sagemaker's LlamaGuard endpoint. 
 
@@ -664,7 +755,7 @@ callbacks: ["llamaguard_moderations"]
 
 
 
-#### Content Moderation with Google Text Moderation 
+### Content Moderation with Google Text Moderation 
 
 Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
 

From 6e10a122b178fec8c05dd2bbbcec39fd6358a5b7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 17:44:36 -0700
Subject: [PATCH 086/150] fix detect secrets test

---
 .circleci/config.yml             | 3 ++-
 litellm/tests/test_completion.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index fd1b48a9c..5dfeedcaa 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -48,7 +48,8 @@ jobs:
             pip install opentelemetry-sdk==1.25.0
             pip install opentelemetry-exporter-otlp==1.25.0
             pip install openai
-            pip install prisma            
+            pip install prisma   
+            pip install "detect_secrets==1.5.0"         
             pip install "httpx==0.24.1"
             pip install fastapi
             pip install "gunicorn==21.2.0"
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 30ae1d0ab..0c6da360b 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries = 3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"

From adfb9cda7935f1457a548e4d9d0fe7a7a2efe38e Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 13:47:38 -0700
Subject: [PATCH 087/150] fix(utils.py): predibase exception mapping - map 424
 as a badrequest error

---
 litellm/llms/predibase.py               | 39 +++++++++++++------------
 litellm/proxy/_super_secret_config.yaml |  5 +++-
 litellm/utils.py                        | 12 +++-----
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py
index 8ad294457..7a137da70 100644
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@@ -1,27 +1,26 @@
 # What is this?
 ## Controller file for Predibase Integration - https://predibase.com/
 
-from functools import partial
-import os, types
-import traceback
+import copy
 import json
-from enum import Enum
-import requests, copy  # type: ignore
+import os
 import time
-from typing import Callable, Optional, List, Literal, Union
-from litellm.utils import (
-    ModelResponse,
-    Usage,
-    CustomStreamWrapper,
-    Message,
-    Choices,
-)
-from litellm.litellm_core_utils.core_helpers import map_finish_reason
-import litellm
-from .prompt_templates.factory import prompt_factory, custom_prompt
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
-from .base import BaseLLM
+import traceback
+import types
+from enum import Enum
+from functools import partial
+from typing import Callable, List, Literal, Optional, Union
+
 import httpx  # type: ignore
+import requests  # type: ignore
+
+import litellm
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
+
+from .base import BaseLLM
+from .prompt_templates.factory import custom_prompt, prompt_factory
 
 
 class PredibaseError(Exception):
@@ -496,7 +495,9 @@ class PredibaseChatCompletion(BaseLLM):
         except httpx.HTTPStatusError as e:
             raise PredibaseError(
                 status_code=e.response.status_code,
-                message="HTTPStatusError - {}".format(e.response.text),
+                message="HTTPStatusError - received status_code={}, error_message={}".format(
+                    e.response.status_code, e.response.text
+                ),
             )
         except Exception as e:
             raise PredibaseError(
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index c5f1b4768..94df97c54 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -14,9 +14,10 @@ model_list:
 - model_name: fake-openai-endpoint
   litellm_params:
     model: predibase/llama-3-8b-instruct
-    api_base: "http://0.0.0.0:8000"
+    # api_base: "http://0.0.0.0:8081"
     api_key: os.environ/PREDIBASE_API_KEY
     tenant_id: os.environ/PREDIBASE_TENANT_ID
+    adapter_id: qwoiqjdoqin
     max_retries: 0
     temperature: 0.1
     max_new_tokens: 256
@@ -73,6 +74,8 @@ model_list:
 
 litellm_settings:
   callbacks: ["dynamic_rate_limiter"]
+  # success_callback: ["langfuse"]
+  # failure_callback: ["langfuse"]
   # default_team_settings: 
   #   - team_id: proj1
   #     success_callback: ["langfuse"]
diff --git a/litellm/utils.py b/litellm/utils.py
index 9f6ebaff0..00833003b 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6157,13 +6157,6 @@ def exception_type(
                         response=original_exception.response,
                         litellm_debug_info=extra_information,
                     )
-                if "Request failed during generation" in error_str:
-                    # this is an internal server error from predibase
-                    raise litellm.InternalServerError(
-                        message=f"PredibaseException - {error_str}",
-                        llm_provider="predibase",
-                        model=model,
-                    )
                 elif hasattr(original_exception, "status_code"):
                     if original_exception.status_code == 500:
                         exception_mapping_worked = True
@@ -6201,7 +6194,10 @@ def exception_type(
                             llm_provider=custom_llm_provider,
                             litellm_debug_info=extra_information,
                         )
-                    elif original_exception.status_code == 422:
+                    elif (
+                        original_exception.status_code == 422
+                        or original_exception.status_code == 424
+                    ):
                         exception_mapping_worked = True
                         raise BadRequestError(
                             message=f"PredibaseException - {original_exception.message}",

From aa9e542d21c8b558d3cf69fbac5a61fc5d9e8d79 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 16:03:47 -0700
Subject: [PATCH 088/150] fix(predibase.py): support json schema on predibase

---
 litellm/llms/predibase.py               | 59 ++++++++++++++++++++++---
 litellm/proxy/_super_secret_config.yaml | 16 +++----
 litellm/utils.py                        | 10 ++++-
 3 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py
index 7a137da70..534f8e26f 100644
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@@ -15,6 +15,8 @@ import httpx  # type: ignore
 import requests  # type: ignore
 
 import litellm
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
 from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
@@ -145,7 +147,49 @@ class PredibaseConfig:
         }
 
     def get_supported_openai_params(self):
-        return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
+        return [
+            "stream",
+            "temperature",
+            "max_tokens",
+            "top_p",
+            "stop",
+            "n",
+            "response_format",
+        ]
+
+    def map_openai_params(self, non_default_params: dict, optional_params: dict):
+        for param, value in non_default_params.items():
+            # temperature, top_p, n, stream, stop, max_tokens, n, presence_penalty default to None
+            if param == "temperature":
+                if value == 0.0 or value == 0:
+                    # hugging face exception raised when temp==0
+                    # Failed: Error occurred: HuggingfaceException - Input validation error: `temperature` must be strictly positive
+                    value = 0.01
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+            if param == "n":
+                optional_params["best_of"] = value
+                optional_params["do_sample"] = (
+                    True  # Need to sample if you want best of for hf inference endpoints
+                )
+            if param == "stream":
+                optional_params["stream"] = value
+            if param == "stop":
+                optional_params["stop"] = value
+            if param == "max_tokens":
+                # HF TGI raises the following exception when max_new_tokens==0
+                # Failed: Error occurred: HuggingfaceException - Input validation error: `max_new_tokens` must be strictly positive
+                if value == 0:
+                    value = 1
+                optional_params["max_new_tokens"] = value
+            if param == "echo":
+                # https://huggingface.co/docs/huggingface_hub/main/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation.decoder_input_details
+                #  Return the decoder input token logprobs and ids. You must set details=True as well for it to be taken into account. Defaults to False
+                optional_params["decoder_input_details"] = True
+            if param == "response_format":
+                optional_params["response_format"] = value
+        return optional_params
 
 
 class PredibaseChatCompletion(BaseLLM):
@@ -224,15 +268,16 @@ class PredibaseChatCompletion(BaseLLM):
                 status_code=response.status_code,
             )
         else:
-            if (
-                not isinstance(completion_response, dict)
-                or "generated_text" not in completion_response
-            ):
+            if not isinstance(completion_response, dict):
                 raise PredibaseError(
                     status_code=422,
-                    message=f"response is not in expected format - {completion_response}",
+                    message=f"'completion_response' is not a dictionary - {completion_response}",
+                )
+            elif "generated_text" not in completion_response:
+                raise PredibaseError(
+                    status_code=422,
+                    message=f"'generated_text' is not a key response dictionary - {completion_response}",
                 )
-
             if len(completion_response["generated_text"]) > 0:
                 model_response["choices"][0]["message"]["content"] = self.output_parser(
                     completion_response["generated_text"]
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index 94df97c54..2060f61ca 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -14,14 +14,10 @@ model_list:
 - model_name: fake-openai-endpoint
   litellm_params:
     model: predibase/llama-3-8b-instruct
-    # api_base: "http://0.0.0.0:8081"
+    api_base: "http://0.0.0.0:8081"
     api_key: os.environ/PREDIBASE_API_KEY
     tenant_id: os.environ/PREDIBASE_TENANT_ID
-    adapter_id: qwoiqjdoqin
-    max_retries: 0
-    temperature: 0.1
     max_new_tokens: 256
-    return_full_text: false
 
 # - litellm_params:
 #     api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
@@ -97,8 +93,8 @@ assistant_settings:
 router_settings:
   enable_pre_call_checks: true
 
-general_settings:
-  alerting: ["slack"]
-  enable_jwt_auth: True
-  litellm_jwtauth:
-    team_id_jwt_field: "client_id" 
\ No newline at end of file
+# general_settings:
+#   # alerting: ["slack"]
+#   enable_jwt_auth: True
+#   litellm_jwtauth:
+#     team_id_jwt_field: "client_id" 
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 00833003b..4465c5b0a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2609,7 +2609,15 @@ def get_optional_params(
             optional_params["top_p"] = top_p
         if stop is not None:
             optional_params["stop_sequences"] = stop
-    elif custom_llm_provider == "huggingface" or custom_llm_provider == "predibase":
+    elif custom_llm_provider == "predibase":
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.PredibaseConfig().map_openai_params(
+            non_default_params=non_default_params, optional_params=optional_params
+        )
+    elif custom_llm_provider == "huggingface":
         ## check if unsupported param passed in
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider

From 85f463dff56dc15a041ea776ac97db32908551ae Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 13:55:54 -0700
Subject: [PATCH 089/150] fix - verify license without api request

---
 litellm/proxy/auth/litellm_license.py | 65 +++++++++++++++++++++++++++
 litellm/proxy/auth/public_key.pem     |  9 ++++
 requirements.txt                      |  1 +
 3 files changed, 75 insertions(+)
 create mode 100644 litellm/proxy/auth/public_key.pem

diff --git a/litellm/proxy/auth/litellm_license.py b/litellm/proxy/auth/litellm_license.py
index ffd9f5273..ec51f904c 100644
--- a/litellm/proxy/auth/litellm_license.py
+++ b/litellm/proxy/auth/litellm_license.py
@@ -1,6 +1,14 @@
 # What is this?
 ## If litellm license in env, checks if it's valid
+import base64
+import json
 import os
+from datetime import datetime
+
+from cryptography.hazmat.primitives import hashes, serialization
+from cryptography.hazmat.primitives.asymmetric import padding, rsa
+
+from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 
 
@@ -15,6 +23,20 @@ class LicenseCheck:
     def __init__(self) -> None:
         self.license_str = os.getenv("LITELLM_LICENSE", None)
         self.http_handler = HTTPHandler()
+        self.public_key = None
+        self.read_public_key()
+
+    def read_public_key(self):
+        # current dir
+        current_dir = os.path.dirname(os.path.realpath(__file__))
+
+        # check if public_key.pem exists
+        _path_to_public_key = os.path.join(current_dir, "public_key.pem")
+        if os.path.exists(_path_to_public_key):
+            with open(_path_to_public_key, "rb") as key_file:
+                self.public_key = serialization.load_pem_public_key(key_file.read())
+        else:
+            self.public_key = None
 
     def _verify(self, license_str: str) -> bool:
         url = "{}/verify_license/{}".format(self.base_url, license_str)
@@ -35,11 +57,54 @@ class LicenseCheck:
             return False
 
     def is_premium(self) -> bool:
+        """
+        1. verify_license_without_api_request: checks if license was generate using private / public key pair
+        2. _verify: checks if license is valid calling litellm API. This is the old way we were generating/validating license
+        """
         try:
             if self.license_str is None:
                 return False
+            elif self.verify_license_without_api_request(
+                public_key=self.public_key, license_key=self.license_str
+            ):
+                return True
             elif self._verify(license_str=self.license_str):
                 return True
             return False
         except Exception as e:
             return False
+
+    def verify_license_without_api_request(self, public_key, license_key):
+        try:
+            # Decode the license key
+            decoded = base64.b64decode(license_key)
+            message, signature = decoded.split(b".", 1)
+
+            # Verify the signature
+            public_key.verify(
+                signature,
+                message,
+                padding.PSS(
+                    mgf=padding.MGF1(hashes.SHA256()),
+                    salt_length=padding.PSS.MAX_LENGTH,
+                ),
+                hashes.SHA256(),
+            )
+
+            # Decode and parse the data
+            license_data = json.loads(message.decode())
+
+            # debug information provided in license data
+            verbose_proxy_logger.debug("License data: %s", license_data)
+
+            # Check expiration date
+            expiration_date = datetime.strptime(
+                license_data["expiration_date"], "%Y-%m-%d"
+            )
+            if expiration_date < datetime.now():
+                return False, "License has expired"
+
+            return True
+
+        except Exception as e:
+            return False
diff --git a/litellm/proxy/auth/public_key.pem b/litellm/proxy/auth/public_key.pem
new file mode 100644
index 000000000..12a69dde2
--- /dev/null
+++ b/litellm/proxy/auth/public_key.pem
@@ -0,0 +1,9 @@
+-----BEGIN PUBLIC KEY-----
+MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmfBuNiNzDkNWyce23koQ
+w0vq3bSVHkq7fd9Sw/U1q7FwRwL221daLTyGWssd8xAoQSFXAJKoBwzJQ9wd+o44
+lfL54E3a61nfjZuF+D9ntpXZFfEAxLVtIahDeQjUz4b/EpgciWIJyUfjCJrQo6LY
+eyAZPTGSO8V3zHyaU+CFywq5XCuCnfZqCZeCw051St59A2v8W32mXSCJ+A+x0hYP
+yXJyRRFcefSFG5IBuRHr4Y24Vx7NUIAoco5cnxJho9g2z3J/Hb0GKW+oBNvRVumk
+nuA2Ljmjh4yI0OoTIW8ZWxemvCCJHSjdfKlMyb+QI4fmeiIUZzP5Au+F561Styqq
+YQIDAQAB
+-----END PUBLIC KEY-----
diff --git a/requirements.txt b/requirements.txt
index e40c44e4d..00d3802da 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,6 +32,7 @@ opentelemetry-api==1.25.0
 opentelemetry-sdk==1.25.0
 opentelemetry-exporter-otlp==1.25.0
 detect-secrets==1.5.0 # Enterprise - secret detection / masking in LLM requests
+cryptography==42.0.7
 
 ### LITELLM PACKAGE DEPENDENCIES
 python-dotenv==1.0.0 # for env 

From 882b6dcab4bed2a1e3d79dd9d3c405e0bb240b15 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 16:28:47 -0700
Subject: [PATCH 090/150] fix only use crypto imports when needed

---
 litellm/proxy/auth/litellm_license.py | 31 ++++++++++++++++-----------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/litellm/proxy/auth/litellm_license.py b/litellm/proxy/auth/litellm_license.py
index ec51f904c..0310dcaf5 100644
--- a/litellm/proxy/auth/litellm_license.py
+++ b/litellm/proxy/auth/litellm_license.py
@@ -5,9 +5,6 @@ import json
 import os
 from datetime import datetime
 
-from cryptography.hazmat.primitives import hashes, serialization
-from cryptography.hazmat.primitives.asymmetric import padding, rsa
-
 from litellm._logging import verbose_proxy_logger
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 
@@ -27,16 +24,22 @@ class LicenseCheck:
         self.read_public_key()
 
     def read_public_key(self):
-        # current dir
-        current_dir = os.path.dirname(os.path.realpath(__file__))
+        try:
+            from cryptography.hazmat.primitives import hashes, serialization
+            from cryptography.hazmat.primitives.asymmetric import padding, rsa
 
-        # check if public_key.pem exists
-        _path_to_public_key = os.path.join(current_dir, "public_key.pem")
-        if os.path.exists(_path_to_public_key):
-            with open(_path_to_public_key, "rb") as key_file:
-                self.public_key = serialization.load_pem_public_key(key_file.read())
-        else:
-            self.public_key = None
+            # current dir
+            current_dir = os.path.dirname(os.path.realpath(__file__))
+
+            # check if public_key.pem exists
+            _path_to_public_key = os.path.join(current_dir, "public_key.pem")
+            if os.path.exists(_path_to_public_key):
+                with open(_path_to_public_key, "rb") as key_file:
+                    self.public_key = serialization.load_pem_public_key(key_file.read())
+            else:
+                self.public_key = None
+        except Exception as e:
+            verbose_proxy_logger.error(f"Error reading public key: {str(e)}")
 
     def _verify(self, license_str: str) -> bool:
         url = "{}/verify_license/{}".format(self.base_url, license_str)
@@ -76,6 +79,9 @@ class LicenseCheck:
 
     def verify_license_without_api_request(self, public_key, license_key):
         try:
+            from cryptography.hazmat.primitives import hashes, serialization
+            from cryptography.hazmat.primitives.asymmetric import padding, rsa
+
             # Decode the license key
             decoded = base64.b64decode(license_key)
             message, signature = decoded.split(b".", 1)
@@ -107,4 +113,5 @@ class LicenseCheck:
             return True
 
         except Exception as e:
+            verbose_proxy_logger.error(str(e))
             return False

From 2b4628fc07f3abb7c37085ea88102f6cd5b54128 Mon Sep 17 00:00:00 2001
From: Steven Osborn <steven@lolsborn.com>
Date: Tue, 25 Jun 2024 09:03:05 -0700
Subject: [PATCH 091/150] create litellm user to fix issue in k8s where prisma
 fails due to user nobody without home directory

---
 Dockerfile.database | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/Dockerfile.database b/Dockerfile.database
index 22084bab8..1901200d5 100644
--- a/Dockerfile.database
+++ b/Dockerfile.database
@@ -9,6 +9,27 @@ FROM $LITELLM_BUILD_IMAGE as builder
 # Set the working directory to /app
 WORKDIR /app
 
+ARG LITELLM_USER=litellm LITELLM_UID=1729
+ARG LITELLM_GROUP=litellm LITELLM_GID=1729
+
+RUN groupadd \
+	--gid ${LITELLM_GID} \
+	${LITELLM_GROUP} \
+	&& useradd \
+	--create-home \
+	--shell /bin/sh \
+	--gid ${LITELLM_GID} \
+	--uid ${LITELLM_UID} \
+	${LITELLM_USER}
+
+# Allows user to update python install.
+# This is necessary for prisma.
+RUN chown -R ${LITELLM_USER}:${LITELLM_GROUP} /usr/local/lib/python3.11
+
+# Set the HOME var forcefully because of prisma.
+ENV HOME=/home/${LITELLM_USER}
+USER ${LITELLM_USER}
+
 # Install build dependencies
 RUN apt-get clean && apt-get update && \
     apt-get install -y gcc python3-dev && \

From d8b548b1ceb8fdbf53ea5e65f235413212d84ec5 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 18:13:31 -0700
Subject: [PATCH 092/150] =?UTF-8?q?bump:=20version=201.40.26=20=E2=86=92?=
 =?UTF-8?q?=201.40.27?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6b4884b5b..321f44b23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.26"
+version = "1.40.27"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.40.26"
+version = "1.40.27"
 version_files = [
     "pyproject.toml:^version"
 ]

From b890229cff5b5399d57cc3a71ee4edf33d7b1e0f Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 18:19:24 -0700
Subject: [PATCH 093/150] Revert "Create litellm user to fix issue with prisma
 in k8s "

---
 Dockerfile.database | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/Dockerfile.database b/Dockerfile.database
index 1901200d5..22084bab8 100644
--- a/Dockerfile.database
+++ b/Dockerfile.database
@@ -9,27 +9,6 @@ FROM $LITELLM_BUILD_IMAGE as builder
 # Set the working directory to /app
 WORKDIR /app
 
-ARG LITELLM_USER=litellm LITELLM_UID=1729
-ARG LITELLM_GROUP=litellm LITELLM_GID=1729
-
-RUN groupadd \
-	--gid ${LITELLM_GID} \
-	${LITELLM_GROUP} \
-	&& useradd \
-	--create-home \
-	--shell /bin/sh \
-	--gid ${LITELLM_GID} \
-	--uid ${LITELLM_UID} \
-	${LITELLM_USER}
-
-# Allows user to update python install.
-# This is necessary for prisma.
-RUN chown -R ${LITELLM_USER}:${LITELLM_GROUP} /usr/local/lib/python3.11
-
-# Set the HOME var forcefully because of prisma.
-ENV HOME=/home/${LITELLM_USER}
-USER ${LITELLM_USER}
-
 # Install build dependencies
 RUN apt-get clean && apt-get update && \
     apt-get install -y gcc python3-dev && \

From b8316d8a8d6ec059771fd5a103320261260c4602 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 16:51:55 -0700
Subject: [PATCH 094/150] fix(router.py): set `cooldown_time:` per model

---
 litellm/integrations/custom_logger.py         | 12 ++--
 litellm/litellm_core_utils/litellm_logging.py |  3 +-
 litellm/main.py                               |  6 ++
 litellm/router.py                             |  4 +-
 litellm/tests/test_router_cooldowns.py        | 56 ++++++++++++++++++-
 litellm/utils.py                              |  2 +
 6 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index 5a6282994..da9826b9b 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -1,11 +1,13 @@
 #### What this does ####
 #    On success, logs events to Promptlayer
-import dotenv, os
-
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.caching import DualCache
-from typing import Literal, Union, Optional
+import os
 import traceback
+from typing import Literal, Optional, Union
+
+import dotenv
+
+from litellm.caching import DualCache
+from litellm.proxy._types import UserAPIKeyAuth
 
 
 class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index aa22b5153..add281e43 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -19,8 +19,7 @@ from litellm import (
     turn_off_message_logging,
     verbose_logger,
 )
-
-from litellm.caching import InMemoryCache, S3Cache, DualCache
+from litellm.caching import DualCache, InMemoryCache, S3Cache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.redact_messages import (
     redact_message_input_output_from_logging,
diff --git a/litellm/main.py b/litellm/main.py
index 573b2c19f..b7aa47ab7 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -650,6 +650,7 @@ def completion(
     headers = kwargs.get("headers", None) or extra_headers
     num_retries = kwargs.get("num_retries", None)  ## deprecated
     max_retries = kwargs.get("max_retries", None)
+    cooldown_time = kwargs.get("cooldown_time", None)
     context_window_fallback_dict = kwargs.get("context_window_fallback_dict", None)
     organization = kwargs.get("organization", None)
     ### CUSTOM MODEL COST ###
@@ -763,6 +764,7 @@ def completion(
         "allowed_model_region",
         "model_config",
         "fastest_response",
+        "cooldown_time",
     ]
 
     default_params = openai_params + litellm_params
@@ -947,6 +949,7 @@ def completion(
             input_cost_per_token=input_cost_per_token,
             output_cost_per_second=output_cost_per_second,
             output_cost_per_token=output_cost_per_token,
+            cooldown_time=cooldown_time,
         )
         logging.update_environment_variables(
             model=model,
@@ -3030,6 +3033,7 @@ def embedding(
     client = kwargs.pop("client", None)
     rpm = kwargs.pop("rpm", None)
     tpm = kwargs.pop("tpm", None)
+    cooldown_time = kwargs.get("cooldown_time", None)
     max_parallel_requests = kwargs.pop("max_parallel_requests", None)
     model_info = kwargs.get("model_info", None)
     metadata = kwargs.get("metadata", None)
@@ -3105,6 +3109,7 @@ def embedding(
         "region_name",
         "allowed_model_region",
         "model_config",
+        "cooldown_time",
     ]
     default_params = openai_params + litellm_params
     non_default_params = {
@@ -3165,6 +3170,7 @@ def embedding(
                 "aembedding": aembedding,
                 "preset_cache_key": None,
                 "stream_response": {},
+                "cooldown_time": cooldown_time,
             },
         )
         if azure == True or custom_llm_provider == "azure":
diff --git a/litellm/router.py b/litellm/router.py
index 840df5b54..e2f7ce8b2 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2816,7 +2816,9 @@ class Router:
 
             exception_response = getattr(exception, "response", {})
             exception_headers = getattr(exception_response, "headers", None)
-            _time_to_cooldown = self.cooldown_time
+            _time_to_cooldown = kwargs.get("litellm_params", {}).get(
+                "cooldown_time", self.cooldown_time
+            )
 
             if exception_headers is not None:
 
diff --git a/litellm/tests/test_router_cooldowns.py b/litellm/tests/test_router_cooldowns.py
index 35095bb2c..3eef6e542 100644
--- a/litellm/tests/test_router_cooldowns.py
+++ b/litellm/tests/test_router_cooldowns.py
@@ -1,18 +1,26 @@
 #### What this tests ####
 #    This tests calling router with fallback models
 
-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
 import pytest
 
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import openai
+
 import litellm
 from litellm import Router
 from litellm.integrations.custom_logger import CustomLogger
-import openai, httpx
 
 
 @pytest.mark.asyncio
@@ -62,3 +70,45 @@ async def test_cooldown_badrequest_error():
     assert response is not None
 
     print(response)
+
+
+@pytest.mark.asyncio
+async def test_dynamic_cooldowns():
+    """
+    Assert kwargs for completion/embedding have 'cooldown_time' as a litellm_param
+    """
+    # litellm.set_verbose = True
+    tmp_mock = MagicMock()
+
+    litellm.failure_callback = [tmp_mock]
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "my-fake-model",
+                "litellm_params": {
+                    "model": "openai/gpt-1",
+                    "api_key": "my-key",
+                    "mock_response": Exception("this is an error"),
+                },
+            }
+        ],
+        cooldown_time=60,
+    )
+
+    try:
+        _ = router.completion(
+            model="my-fake-model",
+            messages=[{"role": "user", "content": "Hey, how's it going?"}],
+            cooldown_time=0,
+            num_retries=0,
+        )
+    except Exception:
+        pass
+
+    tmp_mock.assert_called_once()
+
+    print(tmp_mock.call_count)
+
+    assert "cooldown_time" in tmp_mock.call_args[0][0]["litellm_params"]
+    assert tmp_mock.call_args[0][0]["litellm_params"]["cooldown_time"] == 0
diff --git a/litellm/utils.py b/litellm/utils.py
index 4465c5b0a..beae7ba4a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2017,6 +2017,7 @@ def get_litellm_params(
     input_cost_per_token=None,
     output_cost_per_token=None,
     output_cost_per_second=None,
+    cooldown_time=None,
 ):
     litellm_params = {
         "acompletion": acompletion,
@@ -2039,6 +2040,7 @@ def get_litellm_params(
         "input_cost_per_second": input_cost_per_second,
         "output_cost_per_token": output_cost_per_token,
         "output_cost_per_second": output_cost_per_second,
+        "cooldown_time": cooldown_time,
     }
 
     return litellm_params

From 5e96e035b26ae30463043cf5689ceaf0c0988aa0 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 17:01:58 -0700
Subject: [PATCH 095/150] docs(routing.md): add dynamic cooldowns to docs

---
 docs/my-website/docs/proxy/reliability.md |  1 +
 docs/my-website/docs/routing.md           | 35 ++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md
index c07fc3c26..9228071b0 100644
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@@ -272,6 +272,7 @@ litellm_settings:
   fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries 
   context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
+  cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
 ```
 ### Context Window Fallbacks (Pre-Call Checks + Fallbacks)
 
diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index de0a4a796..240e6c8e0 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -762,6 +762,9 @@ asyncio.run(router_acompletion())
 
 Set the limit for how many calls a model is allowed to fail in a minute, before being cooled down for a minute. 
 
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
 ```python
 from litellm import Router
 
@@ -779,9 +782,39 @@ messages = [{"content": user_message, "role": "user"}]
 response = router.completion(model="gpt-3.5-turbo", messages=messages)
 
 print(f"response: {response}")
-
 ```
 
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+**Set Global Value**
+
+```yaml
+router_settings:
+	allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
+  	cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
+```
+
+Defaults:
+- allowed_fails: 0
+- cooldown_time: 60s 
+
+**Set Per Model**
+
+```yaml
+model_list:
+- model_name: fake-openai-endpoint
+  litellm_params:
+    model: predibase/llama-3-8b-instruct
+    api_key: os.environ/PREDIBASE_API_KEY
+    tenant_id: os.environ/PREDIBASE_TENANT_ID
+    max_new_tokens: 256
+    cooldown_time: 0 # 👈 KEY CHANGE
+```
+
+</TabItem>
+</Tabs>
+
 ### Retries
 
 For both async + sync functions, we support retrying failed requests. 

From e1c54c4c2b5d8c097ddf185f704276269576b777 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 25 Jun 2024 18:21:57 -0700
Subject: [PATCH 096/150] run again

---
 litellm/tests/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 0c6da360b..30ae1d0ab 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries=3
+# litellm.num_retries = 3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"

From f225174024686144652c3e0037bac0411d925dae Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 25 Jun 2024 18:26:16 -0700
Subject: [PATCH 097/150] docs(function_call.md): cleanup

---
 docs/my-website/docs/completion/function_call.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/my-website/docs/completion/function_call.md b/docs/my-website/docs/completion/function_call.md
index 5daccf723..514e8cda1 100644
--- a/docs/my-website/docs/completion/function_call.md
+++ b/docs/my-website/docs/completion/function_call.md
@@ -502,10 +502,10 @@ response = completion(model="gpt-3.5-turbo-0613", messages=messages, functions=f
 print(response)
 ```
 
-## Function calling for Non-OpenAI LLMs
+## Function calling for Models w/out function-calling support
 
 ### Adding Function to prompt
-For Non OpenAI LLMs LiteLLM allows you to add the function to the prompt set: `litellm.add_function_to_prompt = True`
+For Models/providers without function calling support, LiteLLM allows you to add the function to the prompt set: `litellm.add_function_to_prompt = True`
 
 #### Usage
 ```python

From 4a537544360fe20e8f19806987bf1ea87bc3e016 Mon Sep 17 00:00:00 2001
From: Paul Gauthier <paul@paulg.com>
Date: Tue, 25 Jun 2024 07:35:49 -0700
Subject: [PATCH 098/150] Added openrouter/anthropic/claude-3.5-sonnet to model
 json

---
 model_prices_and_context_window.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 415d220f2..e209e096a 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -2073,6 +2073,18 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
+    "openrouter/anthropic/claude-3.5-sonnet": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159
+    },
     "openrouter/anthropic/claude-3-sonnet": {
         "max_tokens": 200000,
         "input_cost_per_token": 0.000003,

From 30c51c489939fce725ccdc3dd3ce637e133235b2 Mon Sep 17 00:00:00 2001
From: Paul Gauthier <paul@paulg.com>
Date: Tue, 25 Jun 2024 07:43:58 -0700
Subject: [PATCH 099/150] Added openrouter/anthropic/claude-3-haiku-20240307

---
 model_prices_and_context_window.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index e209e096a..d7a7a7dc8 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -2073,6 +2073,18 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
+    "openrouter/anthropic/claude-3-haiku-20240307": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000125,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 264
+    },
     "openrouter/anthropic/claude-3.5-sonnet": {
         "max_tokens": 4096,
         "max_input_tokens": 200000,

From 62b64301258c88aa5f2c317a79c8680dc972ac8e Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 26 Jun 2024 08:09:14 -0700
Subject: [PATCH 100/150] docs(reliable_completions.md): improve headers for
 easier searching

---
 .../docs/completion/reliable_completions.md        | 14 ++++++++++----
 litellm/llms/azure.py                              |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/docs/my-website/docs/completion/reliable_completions.md b/docs/my-website/docs/completion/reliable_completions.md
index 2656f9a4f..94102e194 100644
--- a/docs/my-website/docs/completion/reliable_completions.md
+++ b/docs/my-website/docs/completion/reliable_completions.md
@@ -31,9 +31,15 @@ response = completion(
         )
 ```
 
-## Fallbacks 
+## Fallbacks (SDK)
 
-### Context Window Fallbacks
+:::info
+
+[See how to do on PROXY](../proxy/reliability.md)
+
+:::
+
+### Context Window Fallbacks (SDK)
 ```python 
 from litellm import completion
 
@@ -43,7 +49,7 @@ messages = [{"content": "how does a court case get to the Supreme Court?" * 500,
 completion(model="gpt-3.5-turbo", messages=messages, context_window_fallback_dict=ctx_window_fallback_dict)
 ```
 
-### Fallbacks - Switch Models/API Keys/API Bases
+### Fallbacks - Switch Models/API Keys/API Bases (SDK)
 
 LLM APIs can be unstable, completion() with fallbacks ensures you'll always get a response from your calls
 
@@ -69,7 +75,7 @@ response = completion(model="azure/gpt-4", messages=messages, api_key=api_key,
 
 [Check out this section for implementation details](#fallbacks-1)
 
-## Implementation Details 
+## Implementation Details (SDK)
 
 ### Fallbacks
 #### Output from calls
diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index c292c3423..b763a7c95 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -902,7 +902,7 @@ class AzureChatCompletion(BaseLLM):
                 },
             )
 
-            if aembedding == True:
+            if aembedding is True:
                 response = self.aembedding(
                     data=data,
                     input=input,

From 85351078fa7fff21473db571d43b61efc2f848f5 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 06:29:21 -0700
Subject: [PATCH 101/150] feat - add fireworks ai config for param mapping

---
 litellm/llms/fireworks_ai.py                  | 107 ++++++++++++++++++
 ...odel_prices_and_context_window_backup.json |  24 ++++
 2 files changed, 131 insertions(+)
 create mode 100644 litellm/llms/fireworks_ai.py

diff --git a/litellm/llms/fireworks_ai.py b/litellm/llms/fireworks_ai.py
new file mode 100644
index 000000000..18309f4c2
--- /dev/null
+++ b/litellm/llms/fireworks_ai.py
@@ -0,0 +1,107 @@
+import types
+from typing import Literal, Optional, Union
+
+import litellm
+
+
+class FireworksAIConfig:
+    """
+    Reference: https://docs.fireworks.ai/api-reference/post-chatcompletions
+
+    The class `FireworksAIConfig` provides configuration for the Fireworks's Chat Completions API interface. Below are the parameters:
+    """
+
+    tools: Optional[list] = None
+    tool_choice: Optional[Union[str, dict]] = None
+    max_tokens: Optional[int] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    top_k: Optional[int] = None
+    frequency_penalty: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    n: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+    response_format: Optional[dict] = None
+    user: Optional[str] = None
+
+    # Non OpenAI parameters - Fireworks AI only params
+    prompt_truncate_length: Optional[int] = None
+    context_length_exceeded_behavior: Optional[Literal["error", "truncate"]] = None
+
+    def __init__(
+        self,
+        tools: Optional[list] = None,
+        tool_choice: Optional[Union[str, dict]] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        top_k: Optional[int] = None,
+        frequency_penalty: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        n: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        response_format: Optional[dict] = None,
+        user: Optional[str] = None,
+        prompt_truncate_length: Optional[int] = None,
+        context_length_exceeded_behavior: Optional[Literal["error", "truncate"]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return [
+            "stream",
+            "tools",
+            "tool_choice",
+            "max_tokens",
+            "temperature",
+            "top_p",
+            "top_k",
+            "frequency_penalty",
+            "presence_penalty",
+            "n",
+            "stop",
+            "response_format",
+            "user",
+            "prompt_truncate_length",
+            "context_length_exceeded_behavior",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        supported_openai_params = self.get_supported_openai_params()
+        for param, value in non_default_params.items():
+            if param == "tool_choice":
+                if value == "required":
+                    # relevant issue: https://github.com/BerriAI/litellm/issues/4416
+                    optional_params["tools"] = "any"
+
+            if param in supported_openai_params:
+                if value is not None:
+                    optional_params[param] = value
+        return optional_params
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 415d220f2..d7a7a7dc8 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -2073,6 +2073,30 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
+    "openrouter/anthropic/claude-3-haiku-20240307": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000025,
+        "output_cost_per_token": 0.00000125,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 264
+    },
+    "openrouter/anthropic/claude-3.5-sonnet": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "openrouter",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 159
+    },
     "openrouter/anthropic/claude-3-sonnet": {
         "max_tokens": 200000,
         "input_cost_per_token": 0.000003,

From 0548926e4d1b85c7d9b960d995ea9513662e8aaa Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 06:40:44 -0700
Subject: [PATCH 102/150] fix fireworks ai config

---
 litellm/llms/fireworks_ai.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/fireworks_ai.py b/litellm/llms/fireworks_ai.py
index 18309f4c2..7c2d3b72a 100644
--- a/litellm/llms/fireworks_ai.py
+++ b/litellm/llms/fireworks_ai.py
@@ -92,16 +92,15 @@ class FireworksAIConfig:
         non_default_params: dict,
         optional_params: dict,
         model: str,
-        drop_params: bool,
     ) -> dict:
         supported_openai_params = self.get_supported_openai_params()
         for param, value in non_default_params.items():
             if param == "tool_choice":
                 if value == "required":
                     # relevant issue: https://github.com/BerriAI/litellm/issues/4416
-                    optional_params["tools"] = "any"
+                    optional_params["tool_choice"] = "any"
 
-            if param in supported_openai_params:
+            elif param in supported_openai_params:
                 if value is not None:
                     optional_params[param] = value
         return optional_params

From 8ea269c3c3a4a6cfed88d891974807881785722d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 06:43:18 -0700
Subject: [PATCH 103/150] add fireworks ai param mapping

---
 litellm/__init__.py |  1 +
 litellm/utils.py    | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 08ee84aaa..cee80a32d 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -817,6 +817,7 @@ from .llms.openai import (
     AzureAIStudioConfig,
 )
 from .llms.nvidia_nim import NvidiaNimConfig
+from .llms.fireworks_ai import FireworksAIConfig
 from .llms.text_completion_codestral import MistralTextCompletionConfig
 from .llms.azure import (
     AzureOpenAIConfig,
diff --git a/litellm/utils.py b/litellm/utils.py
index beae7ba4a..a33a160e4 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3079,6 +3079,16 @@ def get_optional_params(
         optional_params = litellm.NvidiaNimConfig().map_openai_params(
             non_default_params=non_default_params, optional_params=optional_params
         )
+    elif custom_llm_provider == "fireworks_ai":
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.FireworksAIConfig().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+            model=model,
+        )
     elif custom_llm_provider == "groq":
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
@@ -3645,6 +3655,8 @@ def get_supported_openai_params(
         return litellm.OllamaChatConfig().get_supported_openai_params()
     elif custom_llm_provider == "anthropic":
         return litellm.AnthropicConfig().get_supported_openai_params()
+    elif custom_llm_provider == "fireworks_ai":
+        return litellm.FireworksAIConfig().get_supported_openai_params()
     elif custom_llm_provider == "nvidia_nim":
         return litellm.NvidiaNimConfig().get_supported_openai_params()
     elif custom_llm_provider == "groq":

From 181986a684e5e10a64140e9cd000dbb572778f20 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 06:45:29 -0700
Subject: [PATCH 104/150] test fireworks ai tool calling

---
 litellm/tests/test_completion.py | 38 ++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 30ae1d0ab..a3b0e6ea2 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1222,6 +1222,44 @@ def test_completion_fireworks_ai():
         pytest.fail(f"Error occurred: {e}")
 
 
+def test_fireworks_ai_tool_calling():
+    litellm.set_verbose = True
+    model_name = "fireworks_ai/accounts/fireworks/models/firefunction-v2"
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+    messages = [
+        {
+            "role": "user",
+            "content": "What's the weather like in Boston today in Fahrenheit?",
+        }
+    ]
+    response = completion(
+        model=model_name,
+        messages=messages,
+        tools=tools,
+        tool_choice="required",
+    )
+    print(response)
+
+
 @pytest.mark.skip(reason="this test is flaky")
 def test_completion_perplexity_api():
     try:

From 06a329a53a997a886f79320c151a8b04d1592759 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 06:57:04 -0700
Subject: [PATCH 105/150] fix +  test fireworks ai param mapping for tools

---
 litellm/llms/fireworks_ai.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/fireworks_ai.py b/litellm/llms/fireworks_ai.py
index 7c2d3b72a..e9caf887a 100644
--- a/litellm/llms/fireworks_ai.py
+++ b/litellm/llms/fireworks_ai.py
@@ -99,7 +99,9 @@ class FireworksAIConfig:
                 if value == "required":
                     # relevant issue: https://github.com/BerriAI/litellm/issues/4416
                     optional_params["tool_choice"] = "any"
-
+                else:
+                    # pass through the value of tool choice
+                    optional_params["tool_choice"] = value
             elif param in supported_openai_params:
                 if value is not None:
                     optional_params[param] = value

From 612c950b15c65e6631fcfa6c73679da074a79f26 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 06:58:00 -0700
Subject: [PATCH 106/150] test - fireworks ai param mapping

---
 litellm/tests/test_fireworks_ai.py | 32 ++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 litellm/tests/test_fireworks_ai.py

diff --git a/litellm/tests/test_fireworks_ai.py b/litellm/tests/test_fireworks_ai.py
new file mode 100644
index 000000000..c7c1f5445
--- /dev/null
+++ b/litellm/tests/test_fireworks_ai.py
@@ -0,0 +1,32 @@
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.fireworks_ai import FireworksAIConfig
+
+fireworks = FireworksAIConfig()
+
+
+def test_map_openai_params_tool_choice():
+    # Test case 1: tool_choice is "required"
+    result = fireworks.map_openai_params({"tool_choice": "required"}, {}, "some_model")
+    assert result == {"tool_choice": "any"}
+
+    # Test case 2: tool_choice is "auto"
+    result = fireworks.map_openai_params({"tool_choice": "auto"}, {}, "some_model")
+    assert result == {"tool_choice": "auto"}
+
+    # Test case 3: tool_choice is not present
+    result = fireworks.map_openai_params(
+        {"some_other_param": "value"}, {}, "some_model"
+    )
+    assert result == {}
+
+    # Test case 4: tool_choice is None
+    result = fireworks.map_openai_params({"tool_choice": None}, {}, "some_model")
+    assert result == {"tool_choice": None}

From 9d371f84e5a563f887bcaf4da914a11e2c79bafc Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 12:57:09 -0700
Subject: [PATCH 107/150] fix add ollama codegemma

---
 litellm/model_prices_and_context_window_backup.json | 9 +++++++++
 model_prices_and_context_window.json                | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index d7a7a7dc8..acd03aeea 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -3369,6 +3369,15 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true
     },
+    "ollama/codegemma": {
+        "max_tokens": 8192, 
+        "max_input_tokens": 8192, 
+        "max_output_tokens": 8192, 
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
     "ollama/llama2": {
         "max_tokens": 4096, 
         "max_input_tokens": 4096, 
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index d7a7a7dc8..acd03aeea 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -3369,6 +3369,15 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true
     },
+    "ollama/codegemma": {
+        "max_tokens": 8192, 
+        "max_input_tokens": 8192, 
+        "max_output_tokens": 8192, 
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
     "ollama/llama2": {
         "max_tokens": 4096, 
         "max_input_tokens": 4096, 

From ebc22a75933e695cf5b3204d4eba3852d59c1df9 Mon Sep 17 00:00:00 2001
From: Josh Learn <josh@exponent.run>
Date: Wed, 26 Jun 2024 12:46:59 -0400
Subject: [PATCH 108/150] Add return type annotations to util types

---
 litellm/types/utils.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index f2b161128..378abf4b7 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -171,7 +171,7 @@ class Function(OpenAIObject):
         arguments: Union[Dict, str],
         name: Optional[str] = None,
         **params,
-    ):
+    ) -> None:
         if isinstance(arguments, Dict):
             arguments = json.dumps(arguments)
         else:
@@ -242,7 +242,7 @@ class ChatCompletionMessageToolCall(OpenAIObject):
         id: Optional[str] = None,
         type: Optional[str] = None,
         **params,
-    ):
+    ) -> None:
         super(ChatCompletionMessageToolCall, self).__init__(**params)
         if isinstance(function, Dict):
             self.function = Function(**function)
@@ -285,7 +285,7 @@ class Message(OpenAIObject):
         function_call=None,
         tool_calls=None,
         **params,
-    ):
+    ) -> None:
         super(Message, self).__init__(**params)
         self.content = content
         self.role = role
@@ -328,7 +328,7 @@ class Delta(OpenAIObject):
         function_call=None,
         tool_calls=None,
         **params,
-    ):
+    ) -> None:
         super(Delta, self).__init__(**params)
         self.content = content
         self.role = role
@@ -375,7 +375,7 @@ class Choices(OpenAIObject):
         logprobs=None,
         enhancements=None,
         **params,
-    ):
+    ) -> None:
         super(Choices, self).__init__(**params)
         if finish_reason is not None:
             self.finish_reason = map_finish_reason(
@@ -416,7 +416,7 @@ class Choices(OpenAIObject):
 class Usage(OpenAIObject):
     def __init__(
         self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params
-    ):
+    ) -> None:
         super(Usage, self).__init__(**params)
         if prompt_tokens:
             self.prompt_tokens = prompt_tokens
@@ -451,7 +451,7 @@ class StreamingChoices(OpenAIObject):
         logprobs=None,
         enhancements=None,
         **params,
-    ):
+    ) -> None:
         super(StreamingChoices, self).__init__(**params)
         if finish_reason:
             self.finish_reason = finish_reason
@@ -657,7 +657,7 @@ class EmbeddingResponse(OpenAIObject):
         response_ms=None,
         data=None,
         **params,
-    ):
+    ) -> None:
         object = "list"
         if response_ms:
             _response_ms = response_ms
@@ -708,7 +708,7 @@ class Logprobs(OpenAIObject):
 
 
 class TextChoices(OpenAIObject):
-    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
+    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params) -> None:
         super(TextChoices, self).__init__(**params)
         if finish_reason:
             self.finish_reason = map_finish_reason(finish_reason)
@@ -790,7 +790,7 @@ class TextCompletionResponse(OpenAIObject):
         response_ms=None,
         object=None,
         **params,
-    ):
+    ) -> None:
         if stream:
             object = "text_completion.chunk"
             choices = [TextChoices()]
@@ -873,7 +873,7 @@ class ImageObject(OpenAIObject):
     url: Optional[str] = None
     revised_prompt: Optional[str] = None
 
-    def __init__(self, b64_json=None, url=None, revised_prompt=None):
+    def __init__(self, b64_json=None, url=None, revised_prompt=None) -> None:
         super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
 
     def __contains__(self, key):
@@ -909,7 +909,7 @@ class ImageResponse(OpenAIObject):
 
     _hidden_params: dict = {}
 
-    def __init__(self, created=None, data=None, response_ms=None):
+    def __init__(self, created=None, data=None, response_ms=None) -> None:
         if response_ms:
             _response_ms = response_ms
         else:
@@ -956,7 +956,7 @@ class TranscriptionResponse(OpenAIObject):
 
     _hidden_params: dict = {}
 
-    def __init__(self, text=None):
+    def __init__(self, text=None) -> None:
         super().__init__(text=text)
 
     def __contains__(self, key):

From c0c715b9058f46218817ea74a08746412c1ee48f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 14:21:57 -0700
Subject: [PATCH 109/150] fix cost tracking for whisper

---
 litellm/proxy/spend_tracking/spend_tracking_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
index 54772ca9a..e4027b984 100644
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -29,7 +29,7 @@ def get_logging_payload(
     completion_start_time = kwargs.get("completion_start_time", end_time)
     call_type = kwargs.get("call_type")
     cache_hit = kwargs.get("cache_hit", False)
-    usage = response_obj["usage"]
+    usage = response_obj.get("usage", None) or {}
     if type(usage) == litellm.Usage:
         usage = dict(usage)
     id = response_obj.get("id", kwargs.get("litellm_call_id"))

From e7b315af4c49983c093af36f068c75b413f4b50c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 15:21:49 -0700
Subject: [PATCH 110/150] test_spend_logs_payload_whisper

---
 litellm/tests/test_spend_logs.py | 87 ++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/litellm/tests/test_spend_logs.py b/litellm/tests/test_spend_logs.py
index 3e8301e1e..4cd43bb04 100644
--- a/litellm/tests/test_spend_logs.py
+++ b/litellm/tests/test_spend_logs.py
@@ -205,3 +205,90 @@ def test_spend_logs_payload():
     assert (
         payload["request_tags"] == '["model-anthropic-claude-v2.1", "app-ishaan-prod"]'
     )
+
+
+def test_spend_logs_payload_whisper():
+    """
+    Ensure we can write /transcription request/responses to spend logs
+    """
+
+    kwargs: dict = {
+        "model": "whisper-1",
+        "messages": [{"role": "user", "content": "audio_file"}],
+        "optional_params": {},
+        "litellm_params": {
+            "api_base": "",
+            "metadata": {
+                "user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
+                "user_api_key_alias": None,
+                "user_api_end_user_max_budget": None,
+                "litellm_api_version": "1.40.19",
+                "global_max_parallel_requests": None,
+                "user_api_key_user_id": "default_user_id",
+                "user_api_key_org_id": None,
+                "user_api_key_team_id": None,
+                "user_api_key_team_alias": None,
+                "user_api_key_team_max_budget": None,
+                "user_api_key_team_spend": None,
+                "user_api_key_spend": 0.0,
+                "user_api_key_max_budget": None,
+                "user_api_key_metadata": {},
+                "headers": {
+                    "host": "localhost:4000",
+                    "user-agent": "curl/7.88.1",
+                    "accept": "*/*",
+                    "content-length": "775501",
+                    "content-type": "multipart/form-data; boundary=------------------------21d518e191326d20",
+                },
+                "endpoint": "http://localhost:4000/v1/audio/transcriptions",
+                "litellm_parent_otel_span": None,
+                "model_group": "whisper-1",
+                "deployment": "whisper-1",
+                "model_info": {
+                    "id": "d7761582311451c34d83d65bc8520ce5c1537ea9ef2bec13383cf77596d49eeb",
+                    "db_model": False,
+                },
+                "caching_groups": None,
+            },
+        },
+        "start_time": datetime.datetime(2024, 6, 26, 14, 20, 11, 313291),
+        "stream": False,
+        "user": "",
+        "call_type": "atranscription",
+        "litellm_call_id": "05921cf7-33f9-421c-aad9-33310c1e2702",
+        "completion_start_time": datetime.datetime(2024, 6, 26, 14, 20, 13, 653149),
+        "stream_options": None,
+        "input": "tmp-requestc8640aee-7d85-49c3-b3ef-bdc9255d8e37.wav",
+        "original_response": '{"text": "Four score and seven years ago, our fathers brought forth on this continent a new nation, conceived in liberty and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure."}',
+        "additional_args": {
+            "complete_input_dict": {
+                "model": "whisper-1",
+                "file": "<_io.BufferedReader name='tmp-requestc8640aee-7d85-49c3-b3ef-bdc9255d8e37.wav'>",
+                "language": None,
+                "prompt": None,
+                "response_format": None,
+                "temperature": None,
+            }
+        },
+        "log_event_type": "post_api_call",
+        "end_time": datetime.datetime(2024, 6, 26, 14, 20, 13, 653149),
+        "cache_hit": None,
+        "response_cost": 0.00023398580000000003,
+    }
+
+    response = litellm.utils.TranscriptionResponse(
+        text="Four score and seven years ago, our fathers brought forth on this continent a new nation, conceived in liberty and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure."
+    )
+
+    payload: SpendLogsPayload = get_logging_payload(
+        kwargs=kwargs,
+        response_obj=response,
+        start_time=datetime.datetime.now(),
+        end_time=datetime.datetime.now(),
+        end_user_id="test-user",
+    )
+
+    print("payload: ", payload)
+
+    assert payload["call_type"] == "atranscription"
+    assert payload["spend"] == 0.00023398580000000003

From 31905a69da4939d7ee453a7f9f87abdae9448e75 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 15:59:38 -0700
Subject: [PATCH 111/150] Revert "Add return type annotations to util types"

This reverts commit faef56fe696ff3eba0fcff80c3270534b2887648.
---
 litellm/types/utils.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 378abf4b7..f2b161128 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -171,7 +171,7 @@ class Function(OpenAIObject):
         arguments: Union[Dict, str],
         name: Optional[str] = None,
         **params,
-    ) -> None:
+    ):
         if isinstance(arguments, Dict):
             arguments = json.dumps(arguments)
         else:
@@ -242,7 +242,7 @@ class ChatCompletionMessageToolCall(OpenAIObject):
         id: Optional[str] = None,
         type: Optional[str] = None,
         **params,
-    ) -> None:
+    ):
         super(ChatCompletionMessageToolCall, self).__init__(**params)
         if isinstance(function, Dict):
             self.function = Function(**function)
@@ -285,7 +285,7 @@ class Message(OpenAIObject):
         function_call=None,
         tool_calls=None,
         **params,
-    ) -> None:
+    ):
         super(Message, self).__init__(**params)
         self.content = content
         self.role = role
@@ -328,7 +328,7 @@ class Delta(OpenAIObject):
         function_call=None,
         tool_calls=None,
         **params,
-    ) -> None:
+    ):
         super(Delta, self).__init__(**params)
         self.content = content
         self.role = role
@@ -375,7 +375,7 @@ class Choices(OpenAIObject):
         logprobs=None,
         enhancements=None,
         **params,
-    ) -> None:
+    ):
         super(Choices, self).__init__(**params)
         if finish_reason is not None:
             self.finish_reason = map_finish_reason(
@@ -416,7 +416,7 @@ class Choices(OpenAIObject):
 class Usage(OpenAIObject):
     def __init__(
         self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params
-    ) -> None:
+    ):
         super(Usage, self).__init__(**params)
         if prompt_tokens:
             self.prompt_tokens = prompt_tokens
@@ -451,7 +451,7 @@ class StreamingChoices(OpenAIObject):
         logprobs=None,
         enhancements=None,
         **params,
-    ) -> None:
+    ):
         super(StreamingChoices, self).__init__(**params)
         if finish_reason:
             self.finish_reason = finish_reason
@@ -657,7 +657,7 @@ class EmbeddingResponse(OpenAIObject):
         response_ms=None,
         data=None,
         **params,
-    ) -> None:
+    ):
         object = "list"
         if response_ms:
             _response_ms = response_ms
@@ -708,7 +708,7 @@ class Logprobs(OpenAIObject):
 
 
 class TextChoices(OpenAIObject):
-    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params) -> None:
+    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
         super(TextChoices, self).__init__(**params)
         if finish_reason:
             self.finish_reason = map_finish_reason(finish_reason)
@@ -790,7 +790,7 @@ class TextCompletionResponse(OpenAIObject):
         response_ms=None,
         object=None,
         **params,
-    ) -> None:
+    ):
         if stream:
             object = "text_completion.chunk"
             choices = [TextChoices()]
@@ -873,7 +873,7 @@ class ImageObject(OpenAIObject):
     url: Optional[str] = None
     revised_prompt: Optional[str] = None
 
-    def __init__(self, b64_json=None, url=None, revised_prompt=None) -> None:
+    def __init__(self, b64_json=None, url=None, revised_prompt=None):
         super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
 
     def __contains__(self, key):
@@ -909,7 +909,7 @@ class ImageResponse(OpenAIObject):
 
     _hidden_params: dict = {}
 
-    def __init__(self, created=None, data=None, response_ms=None) -> None:
+    def __init__(self, created=None, data=None, response_ms=None):
         if response_ms:
             _response_ms = response_ms
         else:
@@ -956,7 +956,7 @@ class TranscriptionResponse(OpenAIObject):
 
     _hidden_params: dict = {}
 
-    def __init__(self, text=None) -> None:
+    def __init__(self, text=None):
         super().__init__(text=text)
 
     def __contains__(self, key):

From 1821b32491653569db07097f42e849161d24793d Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 16:01:50 -0700
Subject: [PATCH 112/150] fix handle_openai_chat_completion_chunk

---
 litellm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index a33a160e4..76c93d589 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -8301,7 +8301,7 @@ class CustomStreamWrapper:
             logprobs = None
             usage = None
             original_chunk = None  # this is used for function/tool calling
-            if len(str_line.choices) > 0:
+            if str_line and str_line.choices and len(str_line.choices) > 0:
                 if (
                     str_line.choices[0].delta is not None
                     and str_line.choices[0].delta.content is not None

From d1cb4a195c825bb8886fa597bb889c819eded127 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 26 Jun 2024 16:19:05 -0700
Subject: [PATCH 113/150] fix(bedrock_httpx.py): Fix
 https://github.com/BerriAI/litellm/issues/4415

---
 litellm/llms/bedrock.py                  |  5 ++
 litellm/llms/bedrock_httpx.py            | 30 +++++-----
 litellm/tests/test_bedrock_completion.py | 74 +++++++++++++++++++++---
 3 files changed, 88 insertions(+), 21 deletions(-)

diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index d0d3bef6d..a8c47b3b9 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -1,3 +1,8 @@
+####################################
+######### DEPRECATED FILE ##########
+####################################
+# logic moved to `bedrock_httpx.py` #
+
 import copy
 import json
 import os
diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 84ab10907..14abec784 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -261,20 +261,24 @@ class BedrockLLM(BaseLLM):
         # handle anthropic prompts and amazon titan prompts
         prompt = ""
         chat_history: Optional[list] = None
+        ## CUSTOM PROMPT
+        if model in custom_prompt_dict:
+            # check if the model has a registered custom prompt
+            model_prompt_details = custom_prompt_dict[model]
+            prompt = custom_prompt(
+                role_dict=model_prompt_details["roles"],
+                initial_prompt_value=model_prompt_details.get(
+                    "initial_prompt_value", ""
+                ),
+                final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
+                messages=messages,
+            )
+            return prompt, None
+        ## ELSE
         if provider == "anthropic" or provider == "amazon":
-            if model in custom_prompt_dict:
-                # check if the model has a registered custom prompt
-                model_prompt_details = custom_prompt_dict[model]
-                prompt = custom_prompt(
-                    role_dict=model_prompt_details["roles"],
-                    initial_prompt_value=model_prompt_details["initial_prompt_value"],
-                    final_prompt_value=model_prompt_details["final_prompt_value"],
-                    messages=messages,
-                )
-            else:
-                prompt = prompt_factory(
-                    model=model, messages=messages, custom_llm_provider="bedrock"
-                )
+            prompt = prompt_factory(
+                model=model, messages=messages, custom_llm_provider="bedrock"
+            )
         elif provider == "mistral":
             prompt = prompt_factory(
                 model=model, messages=messages, custom_llm_provider="bedrock"
diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py
index b953ca2a3..24eefceef 100644
--- a/litellm/tests/test_bedrock_completion.py
+++ b/litellm/tests/test_bedrock_completion.py
@@ -1,20 +1,31 @@
 # @pytest.mark.skip(reason="AWS Suspended Account")
-import sys, os
+import os
+import sys
 import traceback
+
 from dotenv import load_dotenv
 
 load_dotenv()
-import os, io
+import io
+import os
 
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
+from unittest.mock import AsyncMock, Mock, patch
+
 import pytest
+
 import litellm
-from litellm import embedding, completion, completion_cost, Timeout, ModelResponse
-from litellm import RateLimitError
-from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
-from unittest.mock import patch, AsyncMock, Mock
+from litellm import (
+    ModelResponse,
+    RateLimitError,
+    Timeout,
+    completion,
+    completion_cost,
+    embedding,
+)
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 
 # litellm.num_retries = 3
 litellm.cache = None
@@ -481,7 +492,10 @@ def test_completion_claude_3_base64():
 def test_provisioned_throughput():
     try:
         litellm.set_verbose = True
-        import botocore, json, io
+        import io
+        import json
+
+        import botocore
         import botocore.session
         from botocore.stub import Stubber
 
@@ -537,7 +551,6 @@ def test_completion_bedrock_mistral_completion_auth():
     # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
     # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
     # aws_region_name = os.environ["AWS_REGION_NAME"]
-
     # os.environ.pop("AWS_ACCESS_KEY_ID", None)
     # os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
     # os.environ.pop("AWS_REGION_NAME", None)
@@ -624,3 +637,48 @@ async def test_bedrock_extra_headers():
         assert "test" in mock_client_post.call_args.kwargs["headers"]
         assert mock_client_post.call_args.kwargs["headers"]["test"] == "hello world"
         mock_client_post.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_bedrock_custom_prompt_template():
+    """
+    Check if custom prompt template used for bedrock models
+
+    Reference: https://github.com/BerriAI/litellm/issues/4415
+    """
+    client = AsyncHTTPHandler()
+
+    with patch.object(client, "post", new=AsyncMock()) as mock_client_post:
+        import json
+
+        try:
+            response = await litellm.acompletion(
+                model="bedrock/mistral.OpenOrca",
+                messages=[{"role": "user", "content": "What's AWS?"}],
+                client=client,
+                roles={
+                    "system": {
+                        "pre_message": "<|im_start|>system\n",
+                        "post_message": "<|im_end|>",
+                    },
+                    "assistant": {
+                        "pre_message": "<|im_start|>assistant\n",
+                        "post_message": "<|im_end|>",
+                    },
+                    "user": {
+                        "pre_message": "<|im_start|>user\n",
+                        "post_message": "<|im_end|>",
+                    },
+                },
+                bos_token="<s>",
+                eos_token="<|im_end|>",
+            )
+        except Exception as e:
+            pass
+
+        print(f"mock_client_post.call_args: {mock_client_post.call_args}")
+        assert "prompt" in mock_client_post.call_args.kwargs["data"]
+
+        prompt = json.loads(mock_client_post.call_args.kwargs["data"])["prompt"]
+        assert prompt == "<|im_start|>user\nWhat's AWS?<|im_end|>"
+        mock_client_post.assert_called_once()

From 9fb0a9cd0a08de7b10e6959e23665084c2a244cc Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 16:16:58 -0700
Subject: [PATCH 114/150] fix - reuse client initialized on proxy config

---
 litellm/llms/azure.py  |  3 ++-
 litellm/llms/openai.py | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index b763a7c95..5d73b9435 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -812,7 +812,7 @@ class AzureChatCompletion(BaseLLM):
         azure_client_params: dict,
         api_key: str,
         input: list,
-        client=None,
+        client: Optional[AsyncAzureOpenAI] = None,
         logging_obj=None,
         timeout=None,
     ):
@@ -911,6 +911,7 @@ class AzureChatCompletion(BaseLLM):
                     model_response=model_response,
                     azure_client_params=azure_client_params,
                     timeout=timeout,
+                    client=client,
                 )
                 return response
             if client is None:
diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
index 55a0d97da..7d14fa450 100644
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@@ -996,11 +996,11 @@ class OpenAIChatCompletion(BaseLLM):
         self,
         input: list,
         data: dict,
-        model_response: ModelResponse,
+        model_response: litellm.utils.EmbeddingResponse,
         timeout: float,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
-        client=None,
+        client: Optional[AsyncOpenAI] = None,
         max_retries=None,
         logging_obj=None,
     ):
@@ -1039,9 +1039,9 @@ class OpenAIChatCompletion(BaseLLM):
         input: list,
         timeout: float,
         logging_obj,
+        model_response: litellm.utils.EmbeddingResponse,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
-        model_response: Optional[litellm.utils.EmbeddingResponse] = None,
         optional_params=None,
         client=None,
         aembedding=None,
@@ -1062,7 +1062,17 @@ class OpenAIChatCompletion(BaseLLM):
             )
 
             if aembedding is True:
-                response = self.aembedding(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)  # type: ignore
+                response = self.aembedding(
+                    data=data,
+                    input=input,
+                    logging_obj=logging_obj,
+                    model_response=model_response,
+                    api_base=api_base,
+                    api_key=api_key,
+                    timeout=timeout,
+                    client=client,
+                    max_retries=max_retries,
+                )
                 return response
 
             openai_client = self._get_openai_client(

From 348a8cffc26bfcbf917bffd058a10e915683bf43 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 16:47:23 -0700
Subject: [PATCH 115/150] add volcengine as provider to litellm

---
 litellm/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index cee80a32d..f4bc95066 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -413,6 +413,7 @@ openai_compatible_providers: List = [
     "mistral",
     "groq",
     "nvidia_nim",
+    "volcengine",
     "codestral",
     "deepseek",
     "deepinfra",
@@ -643,6 +644,7 @@ provider_list: List = [
     "mistral",
     "groq",
     "nvidia_nim",
+    "volcengine",
     "codestral",
     "text-completion-codestral",
     "deepseek",

From 9d49747372abfc0aa0078764faac5e7490313a79 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 16:53:44 -0700
Subject: [PATCH 116/150] add initial support for volcengine

---
 litellm/__init__.py        |  1 +
 litellm/llms/volcengine.py | 87 ++++++++++++++++++++++++++++++++++++++
 litellm/main.py            |  4 ++
 litellm/utils.py           | 23 ++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 litellm/llms/volcengine.py

diff --git a/litellm/__init__.py b/litellm/__init__.py
index f4bc95066..f1cc32cd1 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -820,6 +820,7 @@ from .llms.openai import (
 )
 from .llms.nvidia_nim import NvidiaNimConfig
 from .llms.fireworks_ai import FireworksAIConfig
+from .llms.volcengine import VolcEngineConfig
 from .llms.text_completion_codestral import MistralTextCompletionConfig
 from .llms.azure import (
     AzureOpenAIConfig,
diff --git a/litellm/llms/volcengine.py b/litellm/llms/volcengine.py
new file mode 100644
index 000000000..eb289d1c4
--- /dev/null
+++ b/litellm/llms/volcengine.py
@@ -0,0 +1,87 @@
+import types
+from typing import Literal, Optional, Union
+
+import litellm
+
+
+class VolcEngineConfig:
+    frequency_penalty: Optional[int] = None
+    function_call: Optional[Union[str, dict]] = None
+    functions: Optional[list] = None
+    logit_bias: Optional[dict] = None
+    max_tokens: Optional[int] = None
+    n: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    response_format: Optional[dict] = None
+
+    def __init__(
+        self,
+        frequency_penalty: Optional[int] = None,
+        function_call: Optional[Union[str, dict]] = None,
+        functions: Optional[list] = None,
+        logit_bias: Optional[dict] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        response_format: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self, model: str) -> list:
+        return [
+            "frequency_penalty",
+            "logit_bias",
+            "logprobs",
+            "top_logprobs",
+            "max_tokens",
+            "n",
+            "presence_penalty",
+            "seed",
+            "stop",
+            "stream",
+            "stream_options",
+            "temperature",
+            "top_p",
+            "tools",
+            "tool_choice",
+            "function_call",
+            "functions",
+            "max_retries",
+            "extra_headers",
+        ]  # works across all models
+
+    def map_openai_params(
+        self, non_default_params: dict, optional_params: dict, model: str
+    ) -> dict:
+        supported_openai_params = self.get_supported_openai_params(model)
+        for param, value in non_default_params.items():
+            if param in supported_openai_params:
+                optional_params[param] = value
+        return optional_params
diff --git a/litellm/main.py b/litellm/main.py
index b7aa47ab7..649581936 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -349,6 +349,7 @@ async def acompletion(
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
             or custom_llm_provider == "nvidia_nim"
+            or custom_llm_provider == "volcengine"
             or custom_llm_provider == "codestral"
             or custom_llm_provider == "text-completion-codestral"
             or custom_llm_provider == "deepseek"
@@ -1192,6 +1193,7 @@ def completion(
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
             or custom_llm_provider == "nvidia_nim"
+            or custom_llm_provider == "volcengine"
             or custom_llm_provider == "codestral"
             or custom_llm_provider == "deepseek"
             or custom_llm_provider == "anyscale"
@@ -2954,6 +2956,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
             or custom_llm_provider == "nvidia_nim"
+            or custom_llm_provider == "volcengine"
             or custom_llm_provider == "deepseek"
             or custom_llm_provider == "fireworks_ai"
             or custom_llm_provider == "ollama"
@@ -3533,6 +3536,7 @@ async def atext_completion(
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "groq"
             or custom_llm_provider == "nvidia_nim"
+            or custom_llm_provider == "volcengine"
             or custom_llm_provider == "text-completion-codestral"
             or custom_llm_provider == "deepseek"
             or custom_llm_provider == "fireworks_ai"
diff --git a/litellm/utils.py b/litellm/utils.py
index 76c93d589..42e8cba30 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2413,6 +2413,7 @@ def get_optional_params(
             and custom_llm_provider != "together_ai"
             and custom_llm_provider != "groq"
             and custom_llm_provider != "nvidia_nim"
+            and custom_llm_provider != "volcengine"
             and custom_llm_provider != "deepseek"
             and custom_llm_provider != "codestral"
             and custom_llm_provider != "mistral"
@@ -3089,6 +3090,17 @@ def get_optional_params(
             optional_params=optional_params,
             model=model,
         )
+    elif custom_llm_provider == "volcengine":
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.VolcEngineConfig().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+            model=model,
+        )
+
     elif custom_llm_provider == "groq":
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
@@ -3659,6 +3671,8 @@ def get_supported_openai_params(
         return litellm.FireworksAIConfig().get_supported_openai_params()
     elif custom_llm_provider == "nvidia_nim":
         return litellm.NvidiaNimConfig().get_supported_openai_params()
+    elif custom_llm_provider == "volcengine":
+        return litellm.VolcEngineConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "groq":
         return [
             "temperature",
@@ -4023,6 +4037,10 @@ def get_llm_provider(
                 # nvidia_nim is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
                 api_base = "https://integrate.api.nvidia.com/v1"
                 dynamic_api_key = get_secret("NVIDIA_NIM_API_KEY")
+            elif custom_llm_provider == "volcengine":
+                # volcengine is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
+                api_base = "https://ark.cn-beijing.volces.com/api/v3"
+                dynamic_api_key = get_secret("VOLCENGINE_API_KEY")
             elif custom_llm_provider == "codestral":
                 # codestral is openai compatible, we just need to set this to custom_openai and have the api_base be https://codestral.mistral.ai/v1
                 api_base = "https://codestral.mistral.ai/v1"
@@ -4945,6 +4963,11 @@ def validate_environment(model: Optional[str] = None) -> dict:
                 keys_in_environment = True
             else:
                 missing_keys.append("NVIDIA_NIM_API_KEY")
+        elif custom_llm_provider == "volcengine":
+            if "VOLCENGINE_API_KEY" in os.environ:
+                keys_in_environment = True
+            else:
+                missing_keys.append("VOLCENGINE_API_KEY")
         elif (
             custom_llm_provider == "codestral"
             or custom_llm_provider == "text-completion-codestral"

From 124b80fc7351864053ea6d3ec283815e78e21d24 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 17:04:19 -0700
Subject: [PATCH 117/150] docs - volcengine

---
 docs/my-website/docs/providers/volcano.md | 98 +++++++++++++++++++++++
 docs/my-website/sidebars.js               |  1 +
 2 files changed, 99 insertions(+)
 create mode 100644 docs/my-website/docs/providers/volcano.md

diff --git a/docs/my-website/docs/providers/volcano.md b/docs/my-website/docs/providers/volcano.md
new file mode 100644
index 000000000..1742a43d8
--- /dev/null
+++ b/docs/my-website/docs/providers/volcano.md
@@ -0,0 +1,98 @@
+# Volcano Engine (Volcengine)
+https://www.volcengine.com/docs/82379/1263482
+
+:::tip
+
+**We support ALL Volcengine NIM models, just set `model=volcengine/<any-model-on-volcengine>` as a prefix when sending litellm requests**
+
+:::
+
+## API Key
+```python
+# env variable
+os.environ['VOLCENGINE_API_KEY']
+```
+
+## Sample Usage
+```python
+from litellm import completion
+import os
+
+os.environ['VOLCENGINE_API_KEY'] = ""
+response = completion(
+    model="volcengine/<OUR_ENDPOINT_ID>",
+    messages=[
+        {
+            "role": "user",
+            "content": "What's the weather like in Boston today in Fahrenheit?",
+        }
+    ],
+    temperature=0.2,        # optional
+    top_p=0.9,              # optional
+    frequency_penalty=0.1,  # optional
+    presence_penalty=0.1,   # optional
+    max_tokens=10,          # optional
+    stop=["\n\n"],          # optional
+)
+print(response)
+```
+
+## Sample Usage - Streaming
+```python
+from litellm import completion
+import os
+
+os.environ['VOLCENGINE_API_KEY'] = ""
+response = completion(
+    model="volcengine/<OUR_ENDPOINT_ID>",
+    messages=[
+        {
+            "role": "user",
+            "content": "What's the weather like in Boston today in Fahrenheit?",
+        }
+    ],
+    stream=True,
+    temperature=0.2,        # optional
+    top_p=0.9,              # optional
+    frequency_penalty=0.1,  # optional
+    presence_penalty=0.1,   # optional
+    max_tokens=10,          # optional
+    stop=["\n\n"],          # optional
+)
+
+for chunk in response:
+    print(chunk)
+```
+
+
+## Supported Models - 💥 ALL Volcengine NIM Models Supported!
+We support ALL `volcengine` models, just set `volcengine/<OUR_ENDPOINT_ID>` as a prefix when sending completion requests
+
+## Sample Usage - LiteLLM Proxy
+
+### Config.yaml setting
+
+```yaml
+model_list:
+  - model_name: volcengine-model
+    litellm_params:
+      model: volcengine/<OUR_ENDPOINT_ID>
+      api_key: os.environ/VOLCENGINE_API_KEY
+```
+
+### Send Request
+
+```shell
+curl --location 'http://localhost:4000/chat/completions' \
+    --header 'Authorization: Bearer sk-1234' \
+    --header 'Content-Type: application/json' \
+    --data '{
+    "model": "volcengine-model",
+    "messages": [
+        {
+        "role": "user",
+        "content": "here is my api key. openai_api_key=sk-1234"
+        }
+    ]
+}'
+```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 9835a260b..31bc6abcb 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -147,6 +147,7 @@ const sidebars = {
         "providers/watsonx",
         "providers/predibase",
         "providers/nvidia_nim", 
+        "providers/volcano", 
         "providers/triton-inference-server",
         "providers/ollama", 
         "providers/perplexity", 

From 734d79ce8052b43a660b5c85d7a2137365372a65 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 17:09:30 -0700
Subject: [PATCH 118/150] test volcengine

---
 litellm/tests/test_completion.py | 62 +++++++++++++-------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index a3b0e6ea2..2ceb11a79 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1222,44 +1222,6 @@ def test_completion_fireworks_ai():
         pytest.fail(f"Error occurred: {e}")
 
 
-def test_fireworks_ai_tool_calling():
-    litellm.set_verbose = True
-    model_name = "fireworks_ai/accounts/fireworks/models/firefunction-v2"
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "The city and state, e.g. San Francisco, CA",
-                        },
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                    },
-                    "required": ["location"],
-                },
-            },
-        }
-    ]
-    messages = [
-        {
-            "role": "user",
-            "content": "What's the weather like in Boston today in Fahrenheit?",
-        }
-    ]
-    response = completion(
-        model=model_name,
-        messages=messages,
-        tools=tools,
-        tool_choice="required",
-    )
-    print(response)
-
-
 @pytest.mark.skip(reason="this test is flaky")
 def test_completion_perplexity_api():
     try:
@@ -3508,6 +3470,30 @@ def test_completion_deep_infra_mistral():
 # test_completion_deep_infra_mistral()
 
 
+@pytest.mark.skip(reason="Local test - don't have a volcengine account as yet")
+def test_completion_volcengine():
+    litellm.set_verbose = True
+    model_name = "volcengine/<OUR_ENDPOINT_ID>"
+    try:
+        response = completion(
+            model=model_name,
+            messages=[
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Boston today in Fahrenheit?",
+                }
+            ],
+            api_key="<OUR_API_KEY>",
+        )
+        # Add any assertions here to check the response
+        print(response)
+
+    except litellm.exceptions.Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 def test_completion_nvidia_nim():
     model_name = "nvidia_nim/databricks/dbrx-instruct"
     try:

From b9bc16590d927b3d6ef6631c7d29f1c1082bd905 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 12:31:28 -0700
Subject: [PATCH 119/150] forward otel traceparent in request headers

---
 litellm/proxy/litellm_pre_call_utils.py | 18 ++++++++++++++++++
 litellm/utils.py                        |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 2e670de85..963cdf027 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -144,10 +144,13 @@ async def add_litellm_data_to_request(
     )  # do not store the original `sk-..` api key in the db
     data[_metadata_variable_name]["headers"] = _headers
     data[_metadata_variable_name]["endpoint"] = str(request.url)
+
+    # OTEL Controls / Tracing
     # Add the OTEL Parent Trace before sending it LiteLLM
     data[_metadata_variable_name][
         "litellm_parent_otel_span"
     ] = user_api_key_dict.parent_otel_span
+    _add_otel_traceparent_to_data(data, request=request)
 
     ### END-USER SPECIFIC PARAMS ###
     if user_api_key_dict.allowed_model_region is not None:
@@ -169,3 +172,18 @@ async def add_litellm_data_to_request(
             }  # add the team-specific configs to the completion call
 
     return data
+
+
+def _add_otel_traceparent_to_data(data: dict, request: Request):
+    if data is None:
+        return
+    if request.headers:
+        if "traceparent" in request.headers:
+            # we want to forward this to the LLM Provider
+            # Relevant issue: https://github.com/BerriAI/litellm/issues/4419
+            # pass this in extra_headers
+            if "extra_headers" not in data:
+                data["extra_headers"] = {}
+            _exra_headers = data["extra_headers"]
+            if "traceparent" not in _exra_headers:
+                _exra_headers["traceparent"] = request.headers["traceparent"]
diff --git a/litellm/utils.py b/litellm/utils.py
index 42e8cba30..515918822 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3684,6 +3684,8 @@ def get_supported_openai_params(
             "tool_choice",
             "response_format",
             "seed",
+            "extra_headers",
+            "extra_body",
         ]
     elif custom_llm_provider == "deepseek":
         return [

From df978481956983cbef58dad5e19f7711e2402dcd Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 17:28:29 -0700
Subject: [PATCH 120/150] add codestral pricing

---
 ...odel_prices_and_context_window_backup.json | 36 +++++++++++++++++++
 model_prices_and_context_window.json          | 36 +++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index acd03aeea..1954cb57b 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -863,6 +863,42 @@
         "litellm_provider": "deepseek",
         "mode": "chat"
     },
+    "codestral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "codestral",
+        "mode": "chat"
+    },
+    "codestral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "codestral",
+        "mode": "chat"
+    },
+    "text-completion-codestral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "text-completion-codestral",
+        "mode": "completion"
+    },
+    "text-completion-codestral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "text-completion-codestral",
+        "mode": "completion"
+    },
     "deepseek-coder": {
         "max_tokens": 4096,
         "max_input_tokens": 32000,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index acd03aeea..1954cb57b 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -863,6 +863,42 @@
         "litellm_provider": "deepseek",
         "mode": "chat"
     },
+    "codestral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "codestral",
+        "mode": "chat"
+    },
+    "codestral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "codestral",
+        "mode": "chat"
+    },
+    "text-completion-codestral/codestral-latest": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "text-completion-codestral",
+        "mode": "completion"
+    },
+    "text-completion-codestral/codestral-2405": {
+        "max_tokens": 8191,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000000,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "text-completion-codestral",
+        "mode": "completion"
+    },
     "deepseek-coder": {
         "max_tokens": 4096,
         "max_input_tokens": 32000,

From d499c4c76724ac66828a1da7a8893f2cd6fec709 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 17:31:26 -0700
Subject: [PATCH 121/150] add source for codestral pricing

---
 litellm/model_prices_and_context_window_backup.json | 12 ++++++++----
 model_prices_and_context_window.json                | 12 ++++++++----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 1954cb57b..6b15084a9 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -870,7 +870,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "codestral",
-        "mode": "chat"
+        "mode": "chat",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "codestral/codestral-2405": {
         "max_tokens": 8191,
@@ -879,7 +880,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "codestral",
-        "mode": "chat"
+        "mode": "chat",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "text-completion-codestral/codestral-latest": {
         "max_tokens": 8191,
@@ -888,7 +890,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "text-completion-codestral",
-        "mode": "completion"
+        "mode": "completion",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "text-completion-codestral/codestral-2405": {
         "max_tokens": 8191,
@@ -897,7 +900,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "text-completion-codestral",
-        "mode": "completion"
+        "mode": "completion",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "deepseek-coder": {
         "max_tokens": 4096,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 1954cb57b..6b15084a9 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -870,7 +870,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "codestral",
-        "mode": "chat"
+        "mode": "chat",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "codestral/codestral-2405": {
         "max_tokens": 8191,
@@ -879,7 +880,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "codestral",
-        "mode": "chat"
+        "mode": "chat",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "text-completion-codestral/codestral-latest": {
         "max_tokens": 8191,
@@ -888,7 +890,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "text-completion-codestral",
-        "mode": "completion"
+        "mode": "completion",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "text-completion-codestral/codestral-2405": {
         "max_tokens": 8191,
@@ -897,7 +900,8 @@
         "input_cost_per_token": 0.000000,
         "output_cost_per_token": 0.000000,
         "litellm_provider": "text-completion-codestral",
-        "mode": "completion"
+        "mode": "completion",
+        "source": "https://docs.mistral.ai/capabilities/code_generation/"
     },
     "deepseek-coder": {
         "max_tokens": 4096,

From 80e520c8f0db8dfbcd069866313d92f18e93751a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 08:46:45 -0700
Subject: [PATCH 122/150] add gemini-1.0-ultra-001

---
 ...odel_prices_and_context_window_backup.json | 30 +++++++++++++++++++
 model_prices_and_context_window.json          | 30 +++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 6b15084a9..4e54a4d78 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1272,6 +1272,36 @@
         "supports_function_calling": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "gemini-1.0-ultra": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 0.0000005, 
+        "input_cost_per_character": 0.000000125, 
+        "output_cost_per_token": 0.0000015,
+        "output_cost_per_character": 0.000000375,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+    },
+    "gemini-1.0-ultra-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 0.0000005, 
+        "input_cost_per_character": 0.000000125, 
+        "output_cost_per_token": 0.0000015,
+        "output_cost_per_character": 0.000000375,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+    },
     "gemini-1.0-pro-002": { 
         "max_tokens": 8192,
         "max_input_tokens": 32760,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 6b15084a9..4e54a4d78 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1272,6 +1272,36 @@
         "supports_function_calling": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "gemini-1.0-ultra": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 0.0000005, 
+        "input_cost_per_character": 0.000000125, 
+        "output_cost_per_token": 0.0000015,
+        "output_cost_per_character": 0.000000375,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+    },
+    "gemini-1.0-ultra-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32760,
+        "max_output_tokens": 8192,
+        "input_cost_per_image": 0.0025,
+        "input_cost_per_video_per_second": 0.002,
+        "input_cost_per_token": 0.0000005, 
+        "input_cost_per_character": 0.000000125, 
+        "output_cost_per_token": 0.0000015,
+        "output_cost_per_character": 0.000000375,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+    },
     "gemini-1.0-pro-002": { 
         "max_tokens": 8192,
         "max_input_tokens": 32760,

From 0afe0a5466d640dffddbf1eb912005ce186e47e6 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 08:55:04 -0700
Subject: [PATCH 123/150] fix gemini ultra info

---
 litellm/model_prices_and_context_window_backup.json | 12 ++++++------
 model_prices_and_context_window.json                | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 4e54a4d78..c829e6a53 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1274,8 +1274,8 @@
     },
     "gemini-1.0-ultra": {
         "max_tokens": 8192,
-        "max_input_tokens": 32760,
-        "max_output_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
         "input_cost_per_image": 0.0025,
         "input_cost_per_video_per_second": 0.002,
         "input_cost_per_token": 0.0000005, 
@@ -1285,12 +1285,12 @@
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
     "gemini-1.0-ultra-001": {
         "max_tokens": 8192,
-        "max_input_tokens": 32760,
-        "max_output_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
         "input_cost_per_image": 0.0025,
         "input_cost_per_video_per_second": 0.002,
         "input_cost_per_token": 0.0000005, 
@@ -1300,7 +1300,7 @@
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
     "gemini-1.0-pro-002": { 
         "max_tokens": 8192,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 4e54a4d78..c829e6a53 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1274,8 +1274,8 @@
     },
     "gemini-1.0-ultra": {
         "max_tokens": 8192,
-        "max_input_tokens": 32760,
-        "max_output_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
         "input_cost_per_image": 0.0025,
         "input_cost_per_video_per_second": 0.002,
         "input_cost_per_token": 0.0000005, 
@@ -1285,12 +1285,12 @@
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
     "gemini-1.0-ultra-001": {
         "max_tokens": 8192,
-        "max_input_tokens": 32760,
-        "max_output_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 2048,
         "input_cost_per_image": 0.0025,
         "input_cost_per_video_per_second": 0.002,
         "input_cost_per_token": 0.0000005, 
@@ -1300,7 +1300,7 @@
         "litellm_provider": "vertex_ai-language-models",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here"
+        "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
     "gemini-1.0-pro-002": { 
         "max_tokens": 8192,

From 9441f756562f73d2860c6e872da20926b51b0330 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 11:18:22 -0700
Subject: [PATCH 124/150] add vertex text-bison

---
 ...odel_prices_and_context_window_backup.json | 42 +++++++++++++++++--
 model_prices_and_context_window.json          | 42 +++++++++++++++++--
 2 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index c829e6a53..f9453bc0f 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1068,21 +1068,55 @@
         "tool_use_system_prompt_tokens": 159
     },
     "text-bison": {
-        "max_tokens": 1024,
+        "max_tokens": 2048,
         "max_input_tokens": 8192,
-        "max_output_tokens": 1024,
-        "input_cost_per_token": 0.000000125,
-        "output_cost_per_token": 0.000000125,
+        "max_output_tokens": 2048,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-text-models",
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
     "text-bison@001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison32k": {
         "max_tokens": 1024,
         "max_input_tokens": 8192,
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison32k@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-text-models",
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index c829e6a53..f9453bc0f 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1068,21 +1068,55 @@
         "tool_use_system_prompt_tokens": 159
     },
     "text-bison": {
-        "max_tokens": 1024,
+        "max_tokens": 2048,
         "max_input_tokens": 8192,
-        "max_output_tokens": 1024,
-        "input_cost_per_token": 0.000000125,
-        "output_cost_per_token": 0.000000125,
+        "max_output_tokens": 2048,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-text-models",
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
     "text-bison@001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison32k": {
         "max_tokens": 1024,
         "max_input_tokens": 8192,
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "text-bison32k@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-text-models",
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"

From 69b70121efff8d6fe46ffe56f5ae80c48577555a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 11:26:14 -0700
Subject: [PATCH 125/150] add chat-bison-32k@002

---
 ...odel_prices_and_context_window_backup.json | 30 +++++++++++++++++++
 model_prices_and_context_window.json          | 30 +++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index f9453bc0f..20f5ecec9 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1147,6 +1147,8 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1157,6 +1159,8 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1167,6 +1171,8 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1177,6 +1183,20 @@
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison-32k@002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1187,6 +1207,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-text-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1197,6 +1219,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-text-models",
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1237,6 +1261,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1247,6 +1273,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1257,6 +1285,8 @@
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index f9453bc0f..20f5ecec9 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1147,6 +1147,8 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1157,6 +1159,8 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1167,6 +1171,8 @@
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1177,6 +1183,20 @@
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "chat-bison-32k@002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1187,6 +1207,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-text-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1197,6 +1219,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-text-models",
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1237,6 +1261,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1247,6 +1273,8 @@
         "max_output_tokens": 1024,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
@@ -1257,6 +1285,8 @@
         "max_output_tokens": 8192,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"

From d30ce40768e6d2cd742ce1b02c56234caec88d01 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 11:28:10 -0700
Subject: [PATCH 126/150] add code-bison

---
 ...odel_prices_and_context_window_backup.json | 36 +++++++++++++++++++
 model_prices_and_context_window.json          | 36 +++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 20f5ecec9..39e8a4caf 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1225,6 +1225,42 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "code-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison32k": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison-32k@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "code-gecko@001": {
         "max_tokens": 64,
         "max_input_tokens": 2048,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 20f5ecec9..39e8a4caf 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1225,6 +1225,42 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "code-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison32k": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
+    "code-bison-32k@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "code-gecko@001": {
         "max_tokens": 64,
         "max_input_tokens": 2048,

From c9a27f9d9e2c840efaa99566506a4829aa55b522 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 11:34:48 -0700
Subject: [PATCH 127/150] add code-gecko-latest

---
 litellm/model_prices_and_context_window_backup.json | 10 ++++++++++
 model_prices_and_context_window.json                | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 39e8a4caf..1838c53b2 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1291,6 +1291,16 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "code-gecko-latest": {
+        "max_tokens": 64,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 64,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "codechat-bison": {
         "max_tokens": 1024,
         "max_input_tokens": 6144,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 39e8a4caf..1838c53b2 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1291,6 +1291,16 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "code-gecko-latest": {
+        "max_tokens": 64,
+        "max_input_tokens": 2048,
+        "max_output_tokens": 64,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "codechat-bison": {
         "max_tokens": 1024,
         "max_input_tokens": 6144,

From 76490690a15af8c3b53a03878d7f268d0bf225af Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 11:37:39 -0700
Subject: [PATCH 128/150] add codechat-bison@latest

---
 ...odel_prices_and_context_window_backup.json | 36 +++++++++++++++++++
 model_prices_and_context_window.json          | 36 +++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 1838c53b2..415041dcb 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1301,6 +1301,18 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "codechat-bison@latest": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "codechat-bison": {
         "max_tokens": 1024,
         "max_input_tokens": 6144,
@@ -1325,6 +1337,18 @@
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "codechat-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "codechat-bison-32k": {
         "max_tokens": 8192,
         "max_input_tokens": 32000,
@@ -1337,6 +1361,18 @@
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "codechat-bison-32k@002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "gemini-pro": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 1838c53b2..415041dcb 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1301,6 +1301,18 @@
         "mode": "completion",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "codechat-bison@latest": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "codechat-bison": {
         "max_tokens": 1024,
         "max_input_tokens": 6144,
@@ -1325,6 +1337,18 @@
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "codechat-bison@002": {
+        "max_tokens": 1024,
+        "max_input_tokens": 6144,
+        "max_output_tokens": 1024,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "codechat-bison-32k": {
         "max_tokens": 8192,
         "max_input_tokens": 32000,
@@ -1337,6 +1361,18 @@
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
     },
+    "codechat-bison-32k@002": {
+        "max_tokens": 8192,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "input_cost_per_character": 0.00000025,
+        "output_cost_per_character": 0.0000005,
+        "litellm_provider": "vertex_ai-code-chat-models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
+    },
     "gemini-pro": {
         "max_tokens": 8192,
         "max_input_tokens": 32760,

From c27bfd76b6aa02beebe3661c739eeea405251b14 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 18:08:54 -0700
Subject: [PATCH 129/150] vertex testing

---
 .../tests/test_amazing_vertex_completion.py    | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index c9e5501a8..901d68ef3 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -329,11 +329,14 @@ def test_vertex_ai():
                 "code-gecko@001",
                 "code-gecko@002",
                 "code-gecko@latest",
+                "codechat-bison@latest",
                 "code-bison@001",
                 "text-bison@001",
                 "gemini-1.5-pro",
                 "gemini-1.5-pro-preview-0215",
-            ]:
+            ] or (
+                "gecko" in model or "32k" in model or "ultra" in model or "002" in model
+            ):
                 # our account does not have access to this model
                 continue
             print("making request", model)
@@ -381,12 +384,15 @@ def test_vertex_ai_stream():
                 "code-gecko@001",
                 "code-gecko@002",
                 "code-gecko@latest",
+                "codechat-bison@latest",
                 "code-bison@001",
                 "text-bison@001",
                 "gemini-1.5-pro",
                 "gemini-1.5-pro-preview-0215",
-            ]:
-                # ouraccount does not have access to this model
+            ] or (
+                "gecko" in model or "32k" in model or "ultra" in model or "002" in model
+            ):
+                # our account does not have access to this model
                 continue
             print("making request", model)
             response = completion(
@@ -433,11 +439,12 @@ async def test_async_vertexai_response():
             "code-gecko@001",
             "code-gecko@002",
             "code-gecko@latest",
+            "codechat-bison@latest",
             "code-bison@001",
             "text-bison@001",
             "gemini-1.5-pro",
             "gemini-1.5-pro-preview-0215",
-        ]:
+        ] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model):
             # our account does not have access to this model
             continue
         try:
@@ -479,11 +486,12 @@ async def test_async_vertexai_streaming_response():
             "code-gecko@001",
             "code-gecko@002",
             "code-gecko@latest",
+            "codechat-bison@latest",
             "code-bison@001",
             "text-bison@001",
             "gemini-1.5-pro",
             "gemini-1.5-pro-preview-0215",
-        ]:
+        ] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model):
             # our account does not have access to this model
             continue
         try:

From 7f0c77624652f53f5b72c49616d748e8e22b4fcc Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 19:00:30 -0700
Subject: [PATCH 130/150] fix gemini test

---
 litellm/llms/vertex_httpx.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 856b05f61..bf650aa4a 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -183,10 +183,17 @@ class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty
             if param == "tools" and isinstance(value, list):
                 gtool_func_declarations = []
                 for tool in value:
+                    _parameters = tool.get("function", {}).get("parameters", {})
+                    _properties = _parameters.get("properties", {})
+                    if isinstance(_properties, dict):
+                        for _, _property in _properties.items():
+                            if "enum" in _property and "format" not in _property:
+                                _property["format"] = "enum"
+
                     gtool_func_declaration = FunctionDeclaration(
                         name=tool["function"]["name"],
                         description=tool["function"].get("description", ""),
-                        parameters=tool["function"].get("parameters", {}),
+                        parameters=_parameters,
                     )
                     gtool_func_declarations.append(gtool_func_declaration)
                 optional_params["tools"] = [

From b29b3fb3f40c527533042d51da4ec20c53ab9228 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 19:03:17 -0700
Subject: [PATCH 131/150] =?UTF-8?q?bump:=20version=201.40.27=20=E2=86=92?=
 =?UTF-8?q?=201.40.28?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 321f44b23..4c7192acf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.27"
+version = "1.40.28"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.40.27"
+version = "1.40.28"
 version_files = [
     "pyproject.toml:^version"
 ]

From c4f68851c00b3673b6887a1652ef0ecbe8f9a35c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 26 Jun 2024 19:18:12 -0700
Subject: [PATCH 132/150] ci/cd run again

---
 litellm/tests/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 2ceb11a79..5138e9b61 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -11,7 +11,7 @@ import os
 
 sys.path.insert(
     0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
+)  # Adds-the parent directory to the system path
 
 import os
 from unittest.mock import MagicMock, patch

From 8142891a5c492a9af74de508f48d3d75bc5418fe Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 26 Jun 2024 22:45:29 -0700
Subject: [PATCH 133/150] docs(openai_compatible.md): doc on disabling system
 messages

---
 .../docs/providers/openai_compatible.md           | 15 +++++++++++++++
 docs/my-website/docs/proxy/configs.md             |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/providers/openai_compatible.md b/docs/my-website/docs/providers/openai_compatible.md
index ff0e85709..f02149024 100644
--- a/docs/my-website/docs/providers/openai_compatible.md
+++ b/docs/my-website/docs/providers/openai_compatible.md
@@ -115,3 +115,18 @@ Here's how to call an OpenAI-Compatible Endpoint with the LiteLLM Proxy Server
   </TabItem>
 
   </Tabs>
+
+
+### Advanced - Disable System Messages
+
+Some VLLM models (e.g. gemma) don't support system messages. To map those requests to 'user' messages, use the `supports_system_message` flag. 
+
+```yaml
+model_list:
+- model_name: my-custom-model
+   litellm_params:
+      model: openai/google/gemma
+      api_base: http://my-custom-base
+      api_key: "" 
+      supports_system_message: False # 👈 KEY CHANGE
+```
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md
index 9381a14a4..80235586c 100644
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@@ -427,7 +427,7 @@ model_list:
 
 ```shell
 $ litellm --config /path/to/config.yaml
-```
+``` 
 
 ## Setting Embedding Models 
 

From 6efe2477087d80c312bdcf5f7457554c7877efef Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 26 Jun 2024 22:52:50 -0700
Subject: [PATCH 134/150] fix(utils.py): add new special token for cleanup

---
 litellm/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/litellm/utils.py b/litellm/utils.py
index 515918822..dbc988bb9 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7805,6 +7805,7 @@ class CustomStreamWrapper:
             "<s>",
             "</s>",
             "<|im_end|>",
+            "<|im_start|>",
         ]
         self.holding_chunk = ""
         self.complete_response = ""

From 345e0dfa8fba83d0b6d4a4670528eba333120d37 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 27 Jun 2024 08:56:52 -0700
Subject: [PATCH 135/150] fix(utils.py): handle arguments being None

Fixes https://github.com/BerriAI/litellm/issues/4440
---
 litellm/types/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index f2b161128..a63e34738 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -168,11 +168,13 @@ class Function(OpenAIObject):
 
     def __init__(
         self,
-        arguments: Union[Dict, str],
+        arguments: Optional[Union[Dict, str]],
         name: Optional[str] = None,
         **params,
     ):
-        if isinstance(arguments, Dict):
+        if arguments is None:
+            arguments = ""
+        elif isinstance(arguments, Dict):
             arguments = json.dumps(arguments)
         else:
             arguments = arguments

From 77b78f6630c997727fcf340ac0fb6d2e4c2ccefe Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 27 Jun 2024 08:58:25 -0700
Subject: [PATCH 136/150] =?UTF-8?q?bump:=20version=201.40.28=20=E2=86=92?=
 =?UTF-8?q?=201.40.29?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4c7192acf..6a620d650 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.28"
+version = "1.40.29"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.40.28"
+version = "1.40.29"
 version_files = [
     "pyproject.toml:^version"
 ]

From a975486df01d9df31676e6cae08ea84f730d56aa Mon Sep 17 00:00:00 2001
From: Daniel Liden <djliden91@gmail.com>
Date: Thu, 27 Jun 2024 09:11:09 -0400
Subject: [PATCH 137/150] Update databricks.md

updates some references to predibase to refer to Databricks
---
 docs/my-website/docs/providers/databricks.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/my-website/docs/providers/databricks.md b/docs/my-website/docs/providers/databricks.md
index 24c7c40cf..fcc1d4813 100644
--- a/docs/my-website/docs/providers/databricks.md
+++ b/docs/my-website/docs/providers/databricks.md
@@ -27,7 +27,7 @@ import os
 os.environ["DATABRICKS_API_KEY"] = "databricks key"
 os.environ["DATABRICKS_API_BASE"] = "databricks base url" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 
-# predibase llama-3 call
+# Databricks dbrx-instruct call
 response = completion(
     model="databricks/databricks-dbrx-instruct", 
     messages = [{ "content": "Hello, how are you?","role": "user"}]
@@ -143,8 +143,8 @@ response = completion(
   model_list:
     - model_name: llama-3
       litellm_params:
-        model: predibase/llama-3-8b-instruct
-        api_key: os.environ/PREDIBASE_API_KEY
+        model: databricks/databricks-dbrx-instruct
+        api_key: os.environ/DATABRICKS_API_KEY
         max_tokens: 20
         temperature: 0.5
 ```
@@ -162,7 +162,7 @@ import os
 os.environ["DATABRICKS_API_KEY"] = "databricks key"
 os.environ["DATABRICKS_API_BASE"] = "databricks url"
 
-# predibase llama3 call
+# Databricks bge-large-en call
 response = litellm.embedding(
       model="databricks/databricks-bge-large-en",
       input=["good morning from litellm"],

From 50bafd7af6bf686d551aa0c0c029c0e48bf5c375 Mon Sep 17 00:00:00 2001
From: Daniel Liden <djliden91@gmail.com>
Date: Thu, 27 Jun 2024 09:36:45 -0400
Subject: [PATCH 138/150] Update databricks.md

fixes a couple of examples to use correct endpoints/point to correct models
---
 docs/my-website/docs/providers/databricks.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/my-website/docs/providers/databricks.md b/docs/my-website/docs/providers/databricks.md
index fcc1d4813..c81b0174a 100644
--- a/docs/my-website/docs/providers/databricks.md
+++ b/docs/my-website/docs/providers/databricks.md
@@ -143,13 +143,13 @@ response = completion(
   model_list:
     - model_name: llama-3
       litellm_params:
-        model: databricks/databricks-dbrx-instruct
+        model: databricks/databricks-meta-llama-3-70b-instruct
         api_key: os.environ/DATABRICKS_API_KEY
         max_tokens: 20
         temperature: 0.5
 ```
 
-## Passings Database specific params - 'instruction'
+## Passings Databricks specific params - 'instruction'
 
 For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164)
 
@@ -177,14 +177,13 @@ response = litellm.embedding(
     - model_name: bge-large
       litellm_params:
         model: databricks/databricks-bge-large-en
-        api_key: os.environ/DATABRICKS_API_KEY
-        api_base: os.environ/DATABRICKS_API_BASE
+        api_key: ${DATABRICKS_API_KEY}
+        api_base: ${DATABRICKS_API_BASE}
         instruction: "Represent this sentence for searching relevant passages:"
 ```
 
 
 ## Supported Databricks Chat Completion Models 
-Here's an example of using a Databricks models with LiteLLM
 
 | Model Name                 | Command                                                          |
 |----------------------------|------------------------------------------------------------------|
@@ -196,8 +195,8 @@ Here's an example of using a Databricks models with LiteLLM
 | databricks-mpt-7b-instruct    | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)`   | 
 
 ## Supported Databricks Embedding Models 
-Here's an example of using a databricks models with LiteLLM
 
 | Model Name                 | Command                                                          |
 |----------------------------|------------------------------------------------------------------|
-| databricks-bge-large-en    | `completion(model='databricks/databricks-bge-large-en', messages=messages)`   | 
+| databricks-bge-large-en    | `embedding(model='databricks/databricks-bge-large-en', messages=messages)`   |
+| databricks-gte-large-en    | `embedding(model='databricks/databricks-gte-large-en', messages=messages)`   |

From 01a6077ca50b1d6f66142a33f5390dae5ece9244 Mon Sep 17 00:00:00 2001
From: Daniel Liden <djliden91@gmail.com>
Date: Thu, 27 Jun 2024 12:51:00 -0400
Subject: [PATCH 139/150] undoes changes to proxy yaml api key/base

---
 docs/my-website/docs/providers/databricks.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/my-website/docs/providers/databricks.md b/docs/my-website/docs/providers/databricks.md
index c81b0174a..633350d22 100644
--- a/docs/my-website/docs/providers/databricks.md
+++ b/docs/my-website/docs/providers/databricks.md
@@ -177,8 +177,8 @@ response = litellm.embedding(
     - model_name: bge-large
       litellm_params:
         model: databricks/databricks-bge-large-en
-        api_key: ${DATABRICKS_API_KEY}
-        api_base: ${DATABRICKS_API_BASE}
+        api_key: os.environ/DATABRICKS_API_KEY
+        api_base: os.environ/DATABRICKS_API_BASE
         instruction: "Represent this sentence for searching relevant passages:"
 ```
 

From b95dd09a3fb4e9b077969fa43f7f143740586ede Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 10:40:03 -0700
Subject: [PATCH 140/150] docs - fix model name on claude-3-5-sonnet-20240620
 anthropic

---
 docs/my-website/docs/providers/anthropic.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
index 3b9e67969..e7d3352f9 100644
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@@ -172,7 +172,7 @@ print(response)
 |------------------|--------------------------------------------|
 | claude-3-haiku  | `completion('claude-3-haiku-20240307', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 | claude-3-opus  | `completion('claude-3-opus-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-| claude-3-5-sonnet  | `completion('claude-3-5-sonnet-20240620', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-3-5-sonnet-20240620  | `completion('claude-3-5-sonnet-20240620', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 | claude-3-sonnet  | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 | claude-2.1  | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
 | claude-2  | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |

From 28109faaffd909f0f1adc7360de3f996de144d74 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 12:02:19 -0700
Subject: [PATCH 141/150] fix raise better error message on reaching failed
 vertex import

---
 litellm/llms/vertex_ai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 1dbd93048..4a4abaef4 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -437,7 +437,7 @@ def completion(
     except:
         raise VertexAIError(
             status_code=400,
-            message="vertexai import failed please run `pip install google-cloud-aiplatform`",
+            message="vertexai import failed please run `pip install google-cloud-aiplatform`. This is required for the 'vertex_ai/' route on LiteLLM",
         )
 
     if not (

From 9b98269f59558a37681a1a70d1face8d16e44c46 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 13:19:54 -0700
Subject: [PATCH 142/150] fix secret redaction logic

---
 litellm/proxy/proxy_server.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index c3b855c5f..b9972a723 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2954,6 +2954,11 @@ async def chat_completion(
         if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
             data["model"] = litellm.model_alias_map[data["model"]]
 
+        ### CALL HOOKS ### - modify/reject incoming data before calling the model
+        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
+            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
+        )
+
         ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
         data["litellm_call_id"] = str(uuid.uuid4())
         logging_obj, data = litellm.utils.function_setup(
@@ -2965,11 +2970,6 @@ async def chat_completion(
 
         data["litellm_logging_obj"] = logging_obj
 
-        ### CALL HOOKS ### - modify/reject incoming data before calling the model
-        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
-            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
-        )
-
         tasks = []
         tasks.append(
             proxy_logging_obj.during_call_hook(

From 758304f5c52eedf05ee3a3ebb2301f6a76a46a70 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 13:48:25 -0700
Subject: [PATCH 143/150] test - test_chat_completion_request_with_redaction

---
 litellm/tests/test_secret_detect_hook.py | 84 ++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/litellm/tests/test_secret_detect_hook.py b/litellm/tests/test_secret_detect_hook.py
index a1bf10eba..cb1e01810 100644
--- a/litellm/tests/test_secret_detect_hook.py
+++ b/litellm/tests/test_secret_detect_hook.py
@@ -21,15 +21,20 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
+from fastapi import Request, Response
+from starlette.datastructures import URL
 
 import litellm
 from litellm import Router, mock_completion
 from litellm.caching import DualCache
+from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
     _ENTERPRISE_SecretDetection,
 )
+from litellm.proxy.proxy_server import chat_completion
 from litellm.proxy.utils import ProxyLogging, hash_token
+from litellm.router import Router
 
 ### UNIT TESTS FOR OpenAI Moderation ###
 
@@ -214,3 +219,82 @@ async def test_basic_secret_detection_embeddings_list():
         ],
         "model": "gpt-3.5-turbo",
     }
+
+
+class testLogger(CustomLogger):
+
+    def __init__(self):
+        self.logged_message = None
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print(f"On Async Success")
+
+        self.logged_message = kwargs.get("messages")
+
+
+router = Router(
+    model_list=[
+        {
+            "model_name": "fake-model",
+            "litellm_params": {
+                "model": "openai/fake",
+                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                "api_key": "sk-12345",
+            },
+        }
+    ]
+)
+
+
+@pytest.mark.asyncio
+async def test_chat_completion_request_with_redaction():
+    """
+    IMPORTANT Enterprise Test - Do not delete it:
+    Makes a /chat/completions request on LiteLLM Proxy
+
+    Ensures that the secret is redacted EVEN on the callback
+    """
+    from litellm.proxy import proxy_server
+
+    setattr(proxy_server, "llm_router", router)
+    _test_logger = testLogger()
+    litellm.callbacks = [_ENTERPRISE_SecretDetection(), _test_logger]
+    litellm.set_verbose = True
+
+    # Prepare the query string
+    query_params = "param1=value1&param2=value2"
+
+    # Create the Request object with query parameters
+    request = Request(
+        scope={
+            "type": "http",
+            "method": "POST",
+            "headers": [(b"content-type", b"application/json")],
+            "query_string": query_params.encode(),
+        }
+    )
+
+    request._url = URL(url="/chat/completions")
+
+    async def return_body():
+        return b'{"model": "fake-model", "messages": [{"role": "user", "content": "Hello here is my OPENAI_API_KEY = sk-12345"}]}'
+
+    request.body = return_body
+
+    response = await chat_completion(
+        request=request,
+        user_api_key_dict=UserAPIKeyAuth(
+            api_key="sk-12345",
+            token="hashed_sk-12345",
+        ),
+        fastapi_response=Response(),
+    )
+
+    await asyncio.sleep(3)
+
+    print("Info in callback after running request=", _test_logger.logged_message)
+
+    assert _test_logger.logged_message == [
+        {"role": "user", "content": "Hello here is my OPENAI_API_KEY = [REDACTED]"}
+    ]
+    pass

From 32bd8c0b0e5f00579d68cb9c630c5ab34d2708c8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 15:07:38 -0700
Subject: [PATCH 144/150] feat - improve secret detection

---
 .../enterprise_hooks/secret_detection.py      | 411 +++++++++++++++++-
 1 file changed, 409 insertions(+), 2 deletions(-)

diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
index ded9f27c1..23dd2a7e0 100644
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -33,27 +33,433 @@ from litellm._logging import verbose_proxy_logger
 litellm.set_verbose = True
 
 
+_custom_plugins_path = "file://" + os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "secrets_plugins"
+)
+print("custom plugins path", _custom_plugins_path)
+_default_detect_secrets_config = {
+    "plugins_used": [
+        {"name": "SoftlayerDetector"},
+        {"name": "StripeDetector"},
+        {"name": "NpmDetector"},
+        {"name": "IbmCosHmacDetector"},
+        {"name": "DiscordBotTokenDetector"},
+        {"name": "BasicAuthDetector"},
+        {"name": "AzureStorageKeyDetector"},
+        {"name": "ArtifactoryDetector"},
+        {"name": "AWSKeyDetector"},
+        {"name": "CloudantDetector"},
+        {"name": "IbmCloudIamDetector"},
+        {"name": "JwtTokenDetector"},
+        {"name": "MailchimpDetector"},
+        {"name": "SquareOAuthDetector"},
+        {"name": "PrivateKeyDetector"},
+        {"name": "TwilioKeyDetector"},
+        {
+            "name": "AdafruitKeyDetector",
+            "path": _custom_plugins_path + "/adafruit.py",
+        },
+        {
+            "name": "AdobeSecretDetector",
+            "path": _custom_plugins_path + "/adobe.py",
+        },
+        {
+            "name": "AgeSecretKeyDetector",
+            "path": _custom_plugins_path + "/age_secret_key.py",
+        },
+        {
+            "name": "AirtableApiKeyDetector",
+            "path": _custom_plugins_path + "/airtable_api_key.py",
+        },
+        {
+            "name": "AlgoliaApiKeyDetector",
+            "path": _custom_plugins_path + "/algolia_api_key.py",
+        },
+        {
+            "name": "AlibabaSecretDetector",
+            "path": _custom_plugins_path + "/alibaba.py",
+        },
+        {
+            "name": "AsanaSecretDetector",
+            "path": _custom_plugins_path + "/asana.py",
+        },
+        {
+            "name": "AtlassianApiTokenDetector",
+            "path": _custom_plugins_path + "/atlassian_api_token.py",
+        },
+        {
+            "name": "AuthressAccessKeyDetector",
+            "path": _custom_plugins_path + "/authress_access_key.py",
+        },
+        {
+            "name": "BittrexDetector",
+            "path": _custom_plugins_path + "/beamer_api_token.py",
+        },
+        {
+            "name": "BitbucketDetector",
+            "path": _custom_plugins_path + "/bitbucket.py",
+        },
+        {
+            "name": "BeamerApiTokenDetector",
+            "path": _custom_plugins_path + "/bittrex.py",
+        },
+        {
+            "name": "ClojarsApiTokenDetector",
+            "path": _custom_plugins_path + "/clojars_api_token.py",
+        },
+        {
+            "name": "CodecovAccessTokenDetector",
+            "path": _custom_plugins_path + "/codecov_access_token.py",
+        },
+        {
+            "name": "CoinbaseAccessTokenDetector",
+            "path": _custom_plugins_path + "/coinbase_access_token.py",
+        },
+        {
+            "name": "ConfluentDetector",
+            "path": _custom_plugins_path + "/confluent.py",
+        },
+        {
+            "name": "ContentfulApiTokenDetector",
+            "path": _custom_plugins_path + "/contentful_api_token.py",
+        },
+        {
+            "name": "DatabricksApiTokenDetector",
+            "path": _custom_plugins_path + "/databricks_api_token.py",
+        },
+        {
+            "name": "DatadogAccessTokenDetector",
+            "path": _custom_plugins_path + "/datadog_access_token.py",
+        },
+        {
+            "name": "DefinedNetworkingApiTokenDetector",
+            "path": _custom_plugins_path + "/defined_networking_api_token.py",
+        },
+        {
+            "name": "DigitaloceanDetector",
+            "path": _custom_plugins_path + "/digitalocean.py",
+        },
+        {
+            "name": "DopplerApiTokenDetector",
+            "path": _custom_plugins_path + "/doppler_api_token.py",
+        },
+        {
+            "name": "DroneciAccessTokenDetector",
+            "path": _custom_plugins_path + "/droneci_access_token.py",
+        },
+        {
+            "name": "DuffelApiTokenDetector",
+            "path": _custom_plugins_path + "/duffel_api_token.py",
+        },
+        {
+            "name": "DynatraceApiTokenDetector",
+            "path": _custom_plugins_path + "/dynatrace_api_token.py",
+        },
+        {
+            "name": "DiscordDetector",
+            "path": _custom_plugins_path + "/discord.py",
+        },
+        {
+            "name": "DropboxDetector",
+            "path": _custom_plugins_path + "/dropbox.py",
+        },
+        {
+            "name": "EasyPostDetector",
+            "path": _custom_plugins_path + "/easypost.py",
+        },
+        {
+            "name": "EtsyAccessTokenDetector",
+            "path": _custom_plugins_path + "/etsy_access_token.py",
+        },
+        {
+            "name": "FacebookAccessTokenDetector",
+            "path": _custom_plugins_path + "/facebook_access_token.py",
+        },
+        {
+            "name": "FastlyApiKeyDetector",
+            "path": _custom_plugins_path + "/fastly_api_token.py",
+        },
+        {
+            "name": "FinicityDetector",
+            "path": _custom_plugins_path + "/finicity.py",
+        },
+        {
+            "name": "FinnhubAccessTokenDetector",
+            "path": _custom_plugins_path + "/finnhub_access_token.py",
+        },
+        {
+            "name": "FlickrAccessTokenDetector",
+            "path": _custom_plugins_path + "/flickr_access_token.py",
+        },
+        {
+            "name": "FlutterwaveDetector",
+            "path": _custom_plugins_path + "/flutterwave.py",
+        },
+        {
+            "name": "FrameIoApiTokenDetector",
+            "path": _custom_plugins_path + "/frameio_api_token.py",
+        },
+        {
+            "name": "FreshbooksAccessTokenDetector",
+            "path": _custom_plugins_path + "/freshbooks_access_token.py",
+        },
+        {
+            "name": "GCPApiKeyDetector",
+            "path": _custom_plugins_path + "/gcp_api_key.py",
+        },
+        {
+            "name": "GitHubTokenCustomDetector",
+            "path": _custom_plugins_path + "/github_token.py",
+        },
+        {
+            "name": "GitLabDetector",
+            "path": _custom_plugins_path + "/gitlab.py",
+        },
+        {
+            "name": "GitterAccessTokenDetector",
+            "path": _custom_plugins_path + "/gitter_access_token.py",
+        },
+        {
+            "name": "GoCardlessApiTokenDetector",
+            "path": _custom_plugins_path + "/gocardless_api_token.py",
+        },
+        {
+            "name": "GrafanaDetector",
+            "path": _custom_plugins_path + "/grafana.py",
+        },
+        {
+            "name": "HashiCorpTFApiTokenDetector",
+            "path": _custom_plugins_path + "/hashicorp_tf_api_token.py",
+        },
+        {
+            "name": "HerokuApiKeyDetector",
+            "path": _custom_plugins_path + "/heroku_api_key.py",
+        },
+        {
+            "name": "HubSpotApiTokenDetector",
+            "path": _custom_plugins_path + "/hubspot_api_key.py",
+        },
+        {
+            "name": "HuggingFaceDetector",
+            "path": _custom_plugins_path + "/huggingface.py",
+        },
+        {
+            "name": "IntercomApiTokenDetector",
+            "path": _custom_plugins_path + "/intercom_api_key.py",
+        },
+        {
+            "name": "JFrogDetector",
+            "path": _custom_plugins_path + "/jfrog.py",
+        },
+        {
+            "name": "JWTBase64Detector",
+            "path": _custom_plugins_path + "/jwt.py",
+        },
+        {
+            "name": "KrakenAccessTokenDetector",
+            "path": _custom_plugins_path + "/kraken_access_token.py",
+        },
+        {
+            "name": "KucoinDetector",
+            "path": _custom_plugins_path + "/kucoin.py",
+        },
+        {
+            "name": "LaunchdarklyAccessTokenDetector",
+            "path": _custom_plugins_path + "/launchdarkly_access_token.py",
+        },
+        {
+            "name": "LinearDetector",
+            "path": _custom_plugins_path + "/linear.py",
+        },
+        {
+            "name": "LinkedInDetector",
+            "path": _custom_plugins_path + "/linkedin.py",
+        },
+        {
+            "name": "LobDetector",
+            "path": _custom_plugins_path + "/lob.py",
+        },
+        {
+            "name": "MailgunDetector",
+            "path": _custom_plugins_path + "/mailgun.py",
+        },
+        {
+            "name": "MapBoxApiTokenDetector",
+            "path": _custom_plugins_path + "/mapbox_api_token.py",
+        },
+        {
+            "name": "MattermostAccessTokenDetector",
+            "path": _custom_plugins_path + "/mattermost_access_token.py",
+        },
+        {
+            "name": "MessageBirdDetector",
+            "path": _custom_plugins_path + "/messagebird.py",
+        },
+        {
+            "name": "MicrosoftTeamsWebhookDetector",
+            "path": _custom_plugins_path + "/microsoft_teams_webhook.py",
+        },
+        {
+            "name": "NetlifyAccessTokenDetector",
+            "path": _custom_plugins_path + "/netlify_access_token.py",
+        },
+        {
+            "name": "NewRelicDetector",
+            "path": _custom_plugins_path + "/new_relic.py",
+        },
+        {
+            "name": "NYTimesAccessTokenDetector",
+            "path": _custom_plugins_path + "/nytimes_access_token.py",
+        },
+        {
+            "name": "OktaAccessTokenDetector",
+            "path": _custom_plugins_path + "/okta_access_token.py",
+        },
+        {
+            "name": "OpenAIApiKeyDetector",
+            "path": _custom_plugins_path + "/openai_api_key.py",
+        },
+        {
+            "name": "PlanetScaleDetector",
+            "path": _custom_plugins_path + "/planetscale.py",
+        },
+        {
+            "name": "PostmanApiTokenDetector",
+            "path": _custom_plugins_path + "/postman_api_token.py",
+        },
+        {
+            "name": "PrefectApiTokenDetector",
+            "path": _custom_plugins_path + "/prefect_api_token.py",
+        },
+        {
+            "name": "PulumiApiTokenDetector",
+            "path": _custom_plugins_path + "/pulumi_api_token.py",
+        },
+        {
+            "name": "PyPiUploadTokenDetector",
+            "path": _custom_plugins_path + "/pypi_upload_token.py",
+        },
+        {
+            "name": "RapidApiAccessTokenDetector",
+            "path": _custom_plugins_path + "/rapidapi_access_token.py",
+        },
+        {
+            "name": "ReadmeApiTokenDetector",
+            "path": _custom_plugins_path + "/readme_api_token.py",
+        },
+        {
+            "name": "RubygemsApiTokenDetector",
+            "path": _custom_plugins_path + "/rubygems_api_token.py",
+        },
+        {
+            "name": "ScalingoApiTokenDetector",
+            "path": _custom_plugins_path + "/scalingo_api_token.py",
+        },
+        {
+            "name": "SendbirdDetector",
+            "path": _custom_plugins_path + "/sendbird.py",
+        },
+        {
+            "name": "SendGridApiTokenDetector",
+            "path": _custom_plugins_path + "/sendgrid_api_token.py",
+        },
+        {
+            "name": "SendinBlueApiTokenDetector",
+            "path": _custom_plugins_path + "/sendinblue_api_token.py",
+        },
+        {
+            "name": "SentryAccessTokenDetector",
+            "path": _custom_plugins_path + "/sentry_access_token.py",
+        },
+        {
+            "name": "ShippoApiTokenDetector",
+            "path": _custom_plugins_path + "/shippo_api_token.py",
+        },
+        {
+            "name": "ShopifyDetector",
+            "path": _custom_plugins_path + "/shopify.py",
+        },
+        {
+            "name": "SidekiqDetector",
+            "path": _custom_plugins_path + "/sidekiq.py",
+        },
+        {
+            "name": "SlackDetector",
+            "path": _custom_plugins_path + "/slack.py",
+        },
+        {
+            "name": "SnykApiTokenDetector",
+            "path": _custom_plugins_path + "/snyk_api_token.py",
+        },
+        {
+            "name": "SquarespaceAccessTokenDetector",
+            "path": _custom_plugins_path + "/squarespace_access_token.py",
+        },
+        {
+            "name": "SumoLogicDetector",
+            "path": _custom_plugins_path + "/sumologic.py",
+        },
+        {
+            "name": "TelegramBotApiTokenDetector",
+            "path": _custom_plugins_path + "/telegram_bot_api_token.py",
+        },
+        {
+            "name": "TravisCiAccessTokenDetector",
+            "path": _custom_plugins_path + "/travisci_access_token.py",
+        },
+        {
+            "name": "TwitchApiTokenDetector",
+            "path": _custom_plugins_path + "/twitch_api_token.py",
+        },
+        {
+            "name": "TwitterDetector",
+            "path": _custom_plugins_path + "/twitter.py",
+        },
+        {
+            "name": "TypeformApiTokenDetector",
+            "path": _custom_plugins_path + "/typeform_api_token.py",
+        },
+        {
+            "name": "VaultDetector",
+            "path": _custom_plugins_path + "/vault.py",
+        },
+        {
+            "name": "YandexDetector",
+            "path": _custom_plugins_path + "/yandex.py",
+        },
+        {
+            "name": "ZendeskSecretKeyDetector",
+            "path": _custom_plugins_path + "/zendesk_secret_key.py",
+        },
+        {"name": "Base64HighEntropyString", "limit": 3.0},
+        {"name": "HexHighEntropyString", "limit": 3.0},
+    ]
+}
+
+
 class _ENTERPRISE_SecretDetection(CustomLogger):
     def __init__(self):
         pass
 
     def scan_message_for_secrets(self, message_content: str):
         from detect_secrets import SecretsCollection
-        from detect_secrets.settings import default_settings
+        from detect_secrets.settings import transient_settings
 
         temp_file = tempfile.NamedTemporaryFile(delete=False)
         temp_file.write(message_content.encode("utf-8"))
         temp_file.close()
 
         secrets = SecretsCollection()
-        with default_settings():
+        with transient_settings(_default_detect_secrets_config):
             secrets.scan_file(temp_file.name)
 
         os.remove(temp_file.name)
 
         detected_secrets = []
         for file in secrets.files:
+
             for found_secret in secrets[file]:
+
                 if found_secret.secret_value is None:
                     continue
                 detected_secrets.append(
@@ -76,6 +482,7 @@ class _ENTERPRISE_SecretDetection(CustomLogger):
         if "messages" in data and isinstance(data["messages"], list):
             for message in data["messages"]:
                 if "content" in message and isinstance(message["content"], str):
+
                     detected_secrets = self.scan_message_for_secrets(message["content"])
 
                     for secret in detected_secrets:

From 1ed2b008f1ed378f4cf69d09cd8f02a964fe1488 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 15:12:13 -0700
Subject: [PATCH 145/150] add stricter secret detection

---
 .../secrets_plugins/__init__.py               |  0
 .../secrets_plugins/adafruit.py               | 23 +++++++++++
 .../enterprise_hooks/secrets_plugins/adobe.py | 26 +++++++++++++
 .../secrets_plugins/age_secret_key.py         | 21 ++++++++++
 .../secrets_plugins/airtable_api_key.py       | 23 +++++++++++
 .../secrets_plugins/algolia_api_key.py        | 21 ++++++++++
 .../secrets_plugins/alibaba.py                | 26 +++++++++++++
 .../enterprise_hooks/secrets_plugins/asana.py | 28 ++++++++++++++
 .../secrets_plugins/atlassian_api_token.py    | 24 ++++++++++++
 .../secrets_plugins/authress_access_key.py    | 24 ++++++++++++
 .../secrets_plugins/beamer_api_token.py       | 24 ++++++++++++
 .../secrets_plugins/bitbucket.py              | 28 ++++++++++++++
 .../secrets_plugins/bittrex.py                | 28 ++++++++++++++
 .../secrets_plugins/clojars_api_token.py      | 22 +++++++++++
 .../secrets_plugins/codecov_access_token.py   | 24 ++++++++++++
 .../secrets_plugins/coinbase_access_token.py  | 24 ++++++++++++
 .../secrets_plugins/confluent.py              | 28 ++++++++++++++
 .../secrets_plugins/contentful_api_token.py   | 23 +++++++++++
 .../secrets_plugins/databricks_api_token.py   | 21 ++++++++++
 .../secrets_plugins/datadog_access_token.py   | 23 +++++++++++
 .../defined_networking_api_token.py           | 23 +++++++++++
 .../secrets_plugins/digitalocean.py           | 26 +++++++++++++
 .../secrets_plugins/discord.py                | 32 ++++++++++++++++
 .../secrets_plugins/doppler_api_token.py      | 22 +++++++++++
 .../secrets_plugins/droneci_access_token.py   | 24 ++++++++++++
 .../secrets_plugins/dropbox.py                | 32 ++++++++++++++++
 .../secrets_plugins/duffel_api_token.py       | 22 +++++++++++
 .../secrets_plugins/dynatrace_api_token.py    | 22 +++++++++++
 .../secrets_plugins/easypost.py               | 24 ++++++++++++
 .../secrets_plugins/etsy_access_token.py      | 24 ++++++++++++
 .../secrets_plugins/facebook_access_token.py  | 24 ++++++++++++
 .../secrets_plugins/fastly_api_token.py       | 24 ++++++++++++
 .../secrets_plugins/finicity.py               | 28 ++++++++++++++
 .../secrets_plugins/finnhub_access_token.py   | 24 ++++++++++++
 .../secrets_plugins/flickr_access_token.py    | 24 ++++++++++++
 .../secrets_plugins/flutterwave.py            | 26 +++++++++++++
 .../secrets_plugins/frameio_api_token.py      | 22 +++++++++++
 .../freshbooks_access_token.py                | 24 ++++++++++++
 .../secrets_plugins/gcp_api_key.py            | 24 ++++++++++++
 .../secrets_plugins/github_token.py           | 26 +++++++++++++
 .../secrets_plugins/gitlab.py                 | 26 +++++++++++++
 .../secrets_plugins/gitter_access_token.py    | 24 ++++++++++++
 .../secrets_plugins/gocardless_api_token.py   | 25 ++++++++++++
 .../secrets_plugins/grafana.py                | 32 ++++++++++++++++
 .../secrets_plugins/hashicorp_tf_api_token.py | 22 +++++++++++
 .../secrets_plugins/heroku_api_key.py         | 23 +++++++++++
 .../secrets_plugins/hubspot_api_key.py        | 24 ++++++++++++
 .../secrets_plugins/huggingface.py            | 26 +++++++++++++
 .../secrets_plugins/intercom_api_key.py       | 23 +++++++++++
 .../enterprise_hooks/secrets_plugins/jfrog.py | 28 ++++++++++++++
 .../enterprise_hooks/secrets_plugins/jwt.py   | 24 ++++++++++++
 .../secrets_plugins/kraken_access_token.py    | 24 ++++++++++++
 .../secrets_plugins/kucoin.py                 | 28 ++++++++++++++
 .../launchdarkly_access_token.py              | 23 +++++++++++
 .../secrets_plugins/linear.py                 | 26 +++++++++++++
 .../secrets_plugins/linkedin.py               | 28 ++++++++++++++
 .../enterprise_hooks/secrets_plugins/lob.py   | 28 ++++++++++++++
 .../secrets_plugins/mailgun.py                | 32 ++++++++++++++++
 .../secrets_plugins/mapbox_api_token.py       | 24 ++++++++++++
 .../mattermost_access_token.py                | 24 ++++++++++++
 .../secrets_plugins/messagebird.py            | 28 ++++++++++++++
 .../microsoft_teams_webhook.py                | 24 ++++++++++++
 .../secrets_plugins/netlify_access_token.py   | 24 ++++++++++++
 .../secrets_plugins/new_relic.py              | 32 ++++++++++++++++
 .../secrets_plugins/nytimes_access_token.py   | 23 +++++++++++
 .../secrets_plugins/okta_access_token.py      | 23 +++++++++++
 .../secrets_plugins/openai_api_key.py         | 19 ++++++++++
 .../secrets_plugins/planetscale.py            | 32 ++++++++++++++++
 .../secrets_plugins/postman_api_token.py      | 23 +++++++++++
 .../secrets_plugins/prefect_api_token.py      | 19 ++++++++++
 .../secrets_plugins/pulumi_api_token.py       | 19 ++++++++++
 .../secrets_plugins/pypi_upload_token.py      | 19 ++++++++++
 .../secrets_plugins/rapidapi_access_token.py  | 23 +++++++++++
 .../secrets_plugins/readme_api_token.py       | 21 ++++++++++
 .../secrets_plugins/rubygems_api_token.py     | 21 ++++++++++
 .../secrets_plugins/scalingo_api_token.py     | 19 ++++++++++
 .../secrets_plugins/sendbird.py               | 28 ++++++++++++++
 .../secrets_plugins/sendgrid_api_token.py     | 23 +++++++++++
 .../secrets_plugins/sendinblue_api_token.py   | 23 +++++++++++
 .../secrets_plugins/sentry_access_token.py    | 23 +++++++++++
 .../secrets_plugins/shippo_api_token.py       | 23 +++++++++++
 .../secrets_plugins/shopify.py                | 31 +++++++++++++++
 .../secrets_plugins/sidekiq.py                | 28 ++++++++++++++
 .../enterprise_hooks/secrets_plugins/slack.py | 38 +++++++++++++++++++
 .../secrets_plugins/snyk_api_token.py         | 23 +++++++++++
 .../squarespace_access_token.py               | 23 +++++++++++
 .../secrets_plugins/sumologic.py              | 22 +++++++++++
 .../secrets_plugins/telegram_bot_api_token.py | 23 +++++++++++
 .../secrets_plugins/travisci_access_token.py  | 23 +++++++++++
 .../secrets_plugins/twitch_api_token.py       | 23 +++++++++++
 .../secrets_plugins/twitter.py                | 36 ++++++++++++++++++
 .../secrets_plugins/typeform_api_token.py     | 23 +++++++++++
 .../enterprise_hooks/secrets_plugins/vault.py | 24 ++++++++++++
 .../secrets_plugins/yandex.py                 | 28 ++++++++++++++
 .../secrets_plugins/zendesk_secret_key.py     | 23 +++++++++++
 litellm/tests/test_secret_detect_hook.py      |  8 ++++
 96 files changed, 2337 insertions(+)
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/__init__.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/adafruit.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/adobe.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/age_secret_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/airtable_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/algolia_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/alibaba.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/asana.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/atlassian_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/authress_access_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/beamer_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/bitbucket.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/bittrex.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/clojars_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/codecov_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/coinbase_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/confluent.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/contentful_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/databricks_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/datadog_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/defined_networking_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/digitalocean.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/discord.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/doppler_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/droneci_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/dropbox.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/duffel_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/dynatrace_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/easypost.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/etsy_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/facebook_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/fastly_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/finicity.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/finnhub_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/flickr_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/flutterwave.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/frameio_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/freshbooks_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/gcp_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/github_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/gitlab.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/gitter_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/gocardless_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/grafana.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/hashicorp_tf_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/heroku_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/hubspot_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/huggingface.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/intercom_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/jfrog.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/jwt.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/kraken_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/kucoin.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/launchdarkly_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/linear.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/linkedin.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/lob.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/mailgun.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/mapbox_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/mattermost_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/messagebird.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/microsoft_teams_webhook.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/netlify_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/new_relic.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/nytimes_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/okta_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/openai_api_key.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/planetscale.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/postman_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/prefect_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/pulumi_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/pypi_upload_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/rapidapi_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/readme_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/rubygems_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/scalingo_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/sendbird.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/sendgrid_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/sendinblue_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/sentry_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/shippo_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/shopify.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/sidekiq.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/slack.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/snyk_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/squarespace_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/sumologic.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/telegram_bot_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/travisci_access_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/twitch_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/twitter.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/typeform_api_token.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/vault.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/yandex.py
 create mode 100644 enterprise/enterprise_hooks/secrets_plugins/zendesk_secret_key.py

diff --git a/enterprise/enterprise_hooks/secrets_plugins/__init__.py b/enterprise/enterprise_hooks/secrets_plugins/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/enterprise/enterprise_hooks/secrets_plugins/adafruit.py b/enterprise/enterprise_hooks/secrets_plugins/adafruit.py
new file mode 100644
index 000000000..abee3398f
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/adafruit.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Adafruit keys
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AdafruitKeyDetector(RegexBasedDetector):
+    """Scans for Adafruit keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Adafruit API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:adafruit)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/adobe.py b/enterprise/enterprise_hooks/secrets_plugins/adobe.py
new file mode 100644
index 000000000..7a58ccdf9
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/adobe.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for Adobe keys
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AdobeSecretDetector(RegexBasedDetector):
+    """Scans for Adobe client keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Adobe Client Keys"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Adobe Client ID (OAuth Web)
+            re.compile(
+                r"""(?i)(?:adobe)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Adobe Client Secret
+            re.compile(r"(?i)\b((p8e-)[a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/age_secret_key.py b/enterprise/enterprise_hooks/secrets_plugins/age_secret_key.py
new file mode 100644
index 000000000..2c0c17910
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/age_secret_key.py
@@ -0,0 +1,21 @@
+"""
+This plugin searches for Age secret keys
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AgeSecretKeyDetector(RegexBasedDetector):
+    """Scans for Age secret keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Age Secret Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(r"""AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/airtable_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/airtable_api_key.py
new file mode 100644
index 000000000..8abf4f6e4
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/airtable_api_key.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Airtable API keys
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AirtableApiKeyDetector(RegexBasedDetector):
+    """Scans for Airtable API keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Airtable API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:airtable)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{17})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/algolia_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/algolia_api_key.py
new file mode 100644
index 000000000..cd6c16a8c
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/algolia_api_key.py
@@ -0,0 +1,21 @@
+"""
+This plugin searches for Algolia API keys
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AlgoliaApiKeyDetector(RegexBasedDetector):
+    """Scans for Algolia API keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Algolia API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(r"""(?i)\b((LTAI)[a-z0-9]{20})(?:['|\"|\n|\r|\s|\x60|;]|$)"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/alibaba.py b/enterprise/enterprise_hooks/secrets_plugins/alibaba.py
new file mode 100644
index 000000000..5d071f1a9
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/alibaba.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for Alibaba secrets
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AlibabaSecretDetector(RegexBasedDetector):
+    """Scans for Alibaba AccessKey IDs and Secret Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Alibaba Secrets"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Alibaba AccessKey ID
+            re.compile(r"""(?i)\b((LTAI)[a-z0-9]{20})(?:['|\"|\n|\r|\s|\x60|;]|$)"""),
+            # For Alibaba Secret Key
+            re.compile(
+                r"""(?i)(?:alibaba)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{30})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/asana.py b/enterprise/enterprise_hooks/secrets_plugins/asana.py
new file mode 100644
index 000000000..fd96872c6
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/asana.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Asana secrets
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AsanaSecretDetector(RegexBasedDetector):
+    """Scans for Asana Client IDs and Client Secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Asana Secrets"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Asana Client ID
+            re.compile(
+                r"""(?i)(?:asana)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9]{16})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # For Asana Client Secret
+            re.compile(
+                r"""(?i)(?:asana)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/atlassian_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/atlassian_api_token.py
new file mode 100644
index 000000000..42fd291ff
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/atlassian_api_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Atlassian API tokens
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AtlassianApiTokenDetector(RegexBasedDetector):
+    """Scans for Atlassian API tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Atlassian API token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Atlassian API token
+            re.compile(
+                r"""(?i)(?:atlassian|confluence|jira)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{24})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/authress_access_key.py b/enterprise/enterprise_hooks/secrets_plugins/authress_access_key.py
new file mode 100644
index 000000000..ff7466fc4
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/authress_access_key.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Authress Service Client Access Keys
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class AuthressAccessKeyDetector(RegexBasedDetector):
+    """Scans for Authress Service Client Access Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Authress Service Client Access Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Authress Service Client Access Key
+            re.compile(
+                r"""(?i)\b((?:sc|ext|scauth|authress)_[a-z0-9]{5,30}\.[a-z0-9]{4,6}\.acc[_-][a-z0-9-]{10,32}\.[a-z0-9+/_=-]{30,120})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/beamer_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/beamer_api_token.py
new file mode 100644
index 000000000..5303e6262
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/beamer_api_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Beamer API tokens
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class BeamerApiTokenDetector(RegexBasedDetector):
+    """Scans for Beamer API tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Beamer API token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Beamer API token
+            re.compile(
+                r"""(?i)(?:beamer)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(b_[a-z0-9=_\-]{44})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/bitbucket.py b/enterprise/enterprise_hooks/secrets_plugins/bitbucket.py
new file mode 100644
index 000000000..aae28dcc7
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/bitbucket.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Bitbucket Client ID and Client Secret
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class BitbucketDetector(RegexBasedDetector):
+    """Scans for Bitbucket Client ID and Client Secret."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Bitbucket Secrets"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Bitbucket Client ID
+            re.compile(
+                r"""(?i)(?:bitbucket)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # For Bitbucket Client Secret
+            re.compile(
+                r"""(?i)(?:bitbucket)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/bittrex.py b/enterprise/enterprise_hooks/secrets_plugins/bittrex.py
new file mode 100644
index 000000000..e8bd3347b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/bittrex.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Bittrex Access Key and Secret Key
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class BittrexDetector(RegexBasedDetector):
+    """Scans for Bittrex Access Key and Secret Key."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Bittrex Secrets"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Bittrex Access Key
+            re.compile(
+                r"""(?i)(?:bittrex)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # For Bittrex Secret Key
+            re.compile(
+                r"""(?i)(?:bittrex)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/clojars_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/clojars_api_token.py
new file mode 100644
index 000000000..6eb41ec4b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/clojars_api_token.py
@@ -0,0 +1,22 @@
+"""
+This plugin searches for Clojars API tokens
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ClojarsApiTokenDetector(RegexBasedDetector):
+    """Scans for Clojars API tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Clojars API token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Clojars API token
+            re.compile(r"(?i)(CLOJARS_)[a-z0-9]{60}"),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/codecov_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/codecov_access_token.py
new file mode 100644
index 000000000..51001675f
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/codecov_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Codecov Access Token
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class CodecovAccessTokenDetector(RegexBasedDetector):
+    """Scans for Codecov Access Token."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Codecov Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Codecov Access Token
+            re.compile(
+                r"""(?i)(?:codecov)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/coinbase_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/coinbase_access_token.py
new file mode 100644
index 000000000..0af631be9
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/coinbase_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Coinbase Access Token
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class CoinbaseAccessTokenDetector(RegexBasedDetector):
+    """Scans for Coinbase Access Token."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Coinbase Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Coinbase Access Token
+            re.compile(
+                r"""(?i)(?:coinbase)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/confluent.py b/enterprise/enterprise_hooks/secrets_plugins/confluent.py
new file mode 100644
index 000000000..aefbd42b9
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/confluent.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Confluent Access Token and Confluent Secret Key
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ConfluentDetector(RegexBasedDetector):
+    """Scans for Confluent Access Token and Confluent Secret Key."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Confluent Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # For Confluent Access Token
+            re.compile(
+                r"""(?i)(?:confluent)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{16})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # For Confluent Secret Key
+            re.compile(
+                r"""(?i)(?:confluent)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/contentful_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/contentful_api_token.py
new file mode 100644
index 000000000..33817dc4d
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/contentful_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Contentful delivery API token.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ContentfulApiTokenDetector(RegexBasedDetector):
+    """Scans for Contentful delivery API token."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Contentful API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:contentful)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{43})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/databricks_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/databricks_api_token.py
new file mode 100644
index 000000000..9e47355b1
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/databricks_api_token.py
@@ -0,0 +1,21 @@
+"""
+This plugin searches for Databricks API token.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DatabricksApiTokenDetector(RegexBasedDetector):
+    """Scans for Databricks API token."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Databricks API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(r"""(?i)\b(dapi[a-h0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/datadog_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/datadog_access_token.py
new file mode 100644
index 000000000..bdb430d9b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/datadog_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Datadog Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DatadogAccessTokenDetector(RegexBasedDetector):
+    """Scans for Datadog Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Datadog Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:datadog)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/defined_networking_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/defined_networking_api_token.py
new file mode 100644
index 000000000..b23cdb454
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/defined_networking_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Defined Networking API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DefinedNetworkingApiTokenDetector(RegexBasedDetector):
+    """Scans for Defined Networking API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Defined Networking API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:dnkey)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(dnkey-[a-z0-9=_\-]{26}-[a-z0-9=_\-]{52})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/digitalocean.py b/enterprise/enterprise_hooks/secrets_plugins/digitalocean.py
new file mode 100644
index 000000000..5ffc4f600
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/digitalocean.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for DigitalOcean tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DigitaloceanDetector(RegexBasedDetector):
+    """Scans for various DigitalOcean Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "DigitalOcean Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # OAuth Access Token
+            re.compile(r"""(?i)\b(doo_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""),
+            # Personal Access Token
+            re.compile(r"""(?i)\b(dop_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""),
+            # OAuth Refresh Token
+            re.compile(r"""(?i)\b(dor_v1_[a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/discord.py b/enterprise/enterprise_hooks/secrets_plugins/discord.py
new file mode 100644
index 000000000..c51406b60
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/discord.py
@@ -0,0 +1,32 @@
+"""
+This plugin searches for Discord Client tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DiscordDetector(RegexBasedDetector):
+    """Scans for various Discord Client Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Discord Client Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Discord API key
+            re.compile(
+                r"""(?i)(?:discord)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Discord client ID
+            re.compile(
+                r"""(?i)(?:discord)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9]{18})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Discord client secret
+            re.compile(
+                r"""(?i)(?:discord)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/doppler_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/doppler_api_token.py
new file mode 100644
index 000000000..56c594fc1
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/doppler_api_token.py
@@ -0,0 +1,22 @@
+"""
+This plugin searches for Doppler API tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DopplerApiTokenDetector(RegexBasedDetector):
+    """Scans for Doppler API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Doppler API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Doppler API token
+            re.compile(r"""(?i)dp\.pt\.[a-z0-9]{43}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/droneci_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/droneci_access_token.py
new file mode 100644
index 000000000..8afffb802
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/droneci_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Droneci Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DroneciAccessTokenDetector(RegexBasedDetector):
+    """Scans for Droneci Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Droneci Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Droneci Access Token
+            re.compile(
+                r"""(?i)(?:droneci)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/dropbox.py b/enterprise/enterprise_hooks/secrets_plugins/dropbox.py
new file mode 100644
index 000000000..b19815b26
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/dropbox.py
@@ -0,0 +1,32 @@
+"""
+This plugin searches for Dropbox tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DropboxDetector(RegexBasedDetector):
+    """Scans for various Dropbox Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Dropbox Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Dropbox API secret
+            re.compile(
+                r"""(?i)(?:dropbox)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{15})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Dropbox long-lived API token
+            re.compile(
+                r"""(?i)(?:dropbox)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{11}(AAAAAAAAAA)[a-z0-9\-_=]{43})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Dropbox short-lived API token
+            re.compile(
+                r"""(?i)(?:dropbox)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(sl\.[a-z0-9\-=_]{135})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/duffel_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/duffel_api_token.py
new file mode 100644
index 000000000..aab681598
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/duffel_api_token.py
@@ -0,0 +1,22 @@
+"""
+This plugin searches for Duffel API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DuffelApiTokenDetector(RegexBasedDetector):
+    """Scans for Duffel API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Duffel API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Duffel API Token
+            re.compile(r"""(?i)duffel_(test|live)_[a-z0-9_\-=]{43}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/dynatrace_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/dynatrace_api_token.py
new file mode 100644
index 000000000..caf7dd719
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/dynatrace_api_token.py
@@ -0,0 +1,22 @@
+"""
+This plugin searches for Dynatrace API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class DynatraceApiTokenDetector(RegexBasedDetector):
+    """Scans for Dynatrace API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Dynatrace API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Dynatrace API Token
+            re.compile(r"""(?i)dt0c01\.[a-z0-9]{24}\.[a-z0-9]{64}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/easypost.py b/enterprise/enterprise_hooks/secrets_plugins/easypost.py
new file mode 100644
index 000000000..73d27cb49
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/easypost.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for EasyPost tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class EasyPostDetector(RegexBasedDetector):
+    """Scans for various EasyPost Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "EasyPost Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # EasyPost API token
+            re.compile(r"""(?i)\bEZAK[a-z0-9]{54}"""),
+            # EasyPost test API token
+            re.compile(r"""(?i)\bEZTK[a-z0-9]{54}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/etsy_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/etsy_access_token.py
new file mode 100644
index 000000000..1775a4b41
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/etsy_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Etsy Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class EtsyAccessTokenDetector(RegexBasedDetector):
+    """Scans for Etsy Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Etsy Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Etsy Access Token
+            re.compile(
+                r"""(?i)(?:etsy)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{24})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/facebook_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/facebook_access_token.py
new file mode 100644
index 000000000..edc7d080c
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/facebook_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Facebook Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FacebookAccessTokenDetector(RegexBasedDetector):
+    """Scans for Facebook Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Facebook Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Facebook Access Token
+            re.compile(
+                r"""(?i)(?:facebook)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/fastly_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/fastly_api_token.py
new file mode 100644
index 000000000..4d451cb74
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/fastly_api_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Fastly API keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FastlyApiKeyDetector(RegexBasedDetector):
+    """Scans for Fastly API keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Fastly API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Fastly API key
+            re.compile(
+                r"""(?i)(?:fastly)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/finicity.py b/enterprise/enterprise_hooks/secrets_plugins/finicity.py
new file mode 100644
index 000000000..97414352f
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/finicity.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Finicity API tokens and Client Secrets.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FinicityDetector(RegexBasedDetector):
+    """Scans for Finicity API tokens and Client Secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Finicity Credentials"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Finicity API token
+            re.compile(
+                r"""(?i)(?:finicity)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Finicity Client Secret
+            re.compile(
+                r"""(?i)(?:finicity)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{20})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/finnhub_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/finnhub_access_token.py
new file mode 100644
index 000000000..eeb09682b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/finnhub_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Finnhub Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FinnhubAccessTokenDetector(RegexBasedDetector):
+    """Scans for Finnhub Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Finnhub Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Finnhub Access Token
+            re.compile(
+                r"""(?i)(?:finnhub)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{20})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/flickr_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/flickr_access_token.py
new file mode 100644
index 000000000..530628547
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/flickr_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Flickr Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FlickrAccessTokenDetector(RegexBasedDetector):
+    """Scans for Flickr Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Flickr Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Flickr Access Token
+            re.compile(
+                r"""(?i)(?:flickr)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/flutterwave.py b/enterprise/enterprise_hooks/secrets_plugins/flutterwave.py
new file mode 100644
index 000000000..fc46ba222
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/flutterwave.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for Flutterwave API keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FlutterwaveDetector(RegexBasedDetector):
+    """Scans for Flutterwave API Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Flutterwave API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Flutterwave Encryption Key
+            re.compile(r"""(?i)FLWSECK_TEST-[a-h0-9]{12}"""),
+            # Flutterwave Public Key
+            re.compile(r"""(?i)FLWPUBK_TEST-[a-h0-9]{32}-X"""),
+            # Flutterwave Secret Key
+            re.compile(r"""(?i)FLWSECK_TEST-[a-h0-9]{32}-X"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/frameio_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/frameio_api_token.py
new file mode 100644
index 000000000..9524e873d
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/frameio_api_token.py
@@ -0,0 +1,22 @@
+"""
+This plugin searches for Frame.io API tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FrameIoApiTokenDetector(RegexBasedDetector):
+    """Scans for Frame.io API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Frame.io API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Frame.io API token
+            re.compile(r"""(?i)fio-u-[a-z0-9\-_=]{64}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/freshbooks_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/freshbooks_access_token.py
new file mode 100644
index 000000000..b6b16e2b8
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/freshbooks_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Freshbooks Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class FreshbooksAccessTokenDetector(RegexBasedDetector):
+    """Scans for Freshbooks Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Freshbooks Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Freshbooks Access Token
+            re.compile(
+                r"""(?i)(?:freshbooks)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/gcp_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/gcp_api_key.py
new file mode 100644
index 000000000..6055cc262
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/gcp_api_key.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for GCP API keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class GCPApiKeyDetector(RegexBasedDetector):
+    """Scans for GCP API keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "GCP API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # GCP API Key
+            re.compile(
+                r"""(?i)\b(AIza[0-9A-Za-z\\-_]{35})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/github_token.py b/enterprise/enterprise_hooks/secrets_plugins/github_token.py
new file mode 100644
index 000000000..acb5e3fc7
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/github_token.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for GitHub tokens
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class GitHubTokenCustomDetector(RegexBasedDetector):
+    """Scans for GitHub tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "GitHub Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # GitHub App/Personal Access/OAuth Access/Refresh Token
+            # ref. https://github.blog/2021-04-05-behind-githubs-new-authentication-token-formats/
+            re.compile(r"(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36}"),
+            # GitHub Fine-Grained Personal Access Token
+            re.compile(r"github_pat_[0-9a-zA-Z_]{82}"),
+            re.compile(r"gho_[0-9a-zA-Z]{36}"),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/gitlab.py b/enterprise/enterprise_hooks/secrets_plugins/gitlab.py
new file mode 100644
index 000000000..2277d8a2d
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/gitlab.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for GitLab secrets.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class GitLabDetector(RegexBasedDetector):
+    """Scans for GitLab Secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "GitLab Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # GitLab Personal Access Token
+            re.compile(r"""glpat-[0-9a-zA-Z\-\_]{20}"""),
+            # GitLab Pipeline Trigger Token
+            re.compile(r"""glptt-[0-9a-f]{40}"""),
+            # GitLab Runner Registration Token
+            re.compile(r"""GR1348941[0-9a-zA-Z\-\_]{20}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/gitter_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/gitter_access_token.py
new file mode 100644
index 000000000..1febe70cb
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/gitter_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Gitter Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class GitterAccessTokenDetector(RegexBasedDetector):
+    """Scans for Gitter Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Gitter Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Gitter Access Token
+            re.compile(
+                r"""(?i)(?:gitter)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/gocardless_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/gocardless_api_token.py
new file mode 100644
index 000000000..240f6e4c5
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/gocardless_api_token.py
@@ -0,0 +1,25 @@
+"""
+This plugin searches for GoCardless API tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class GoCardlessApiTokenDetector(RegexBasedDetector):
+    """Scans for GoCardless API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "GoCardless API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # GoCardless API token
+            re.compile(
+                r"""(?:gocardless)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(live_[a-z0-9\-_=]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)""",
+                re.IGNORECASE,
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/grafana.py b/enterprise/enterprise_hooks/secrets_plugins/grafana.py
new file mode 100644
index 000000000..fd37f0f63
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/grafana.py
@@ -0,0 +1,32 @@
+"""
+This plugin searches for Grafana secrets.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class GrafanaDetector(RegexBasedDetector):
+    """Scans for Grafana Secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Grafana Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Grafana API key or Grafana Cloud API key
+            re.compile(
+                r"""(?i)\b(eyJrIjoi[A-Za-z0-9]{70,400}={0,2})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Grafana Cloud API token
+            re.compile(
+                r"""(?i)\b(glc_[A-Za-z0-9+/]{32,400}={0,2})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Grafana Service Account token
+            re.compile(
+                r"""(?i)\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/hashicorp_tf_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/hashicorp_tf_api_token.py
new file mode 100644
index 000000000..97013fd84
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/hashicorp_tf_api_token.py
@@ -0,0 +1,22 @@
+"""
+This plugin searches for HashiCorp Terraform user/org API tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class HashiCorpTFApiTokenDetector(RegexBasedDetector):
+    """Scans for HashiCorp Terraform User/Org API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "HashiCorp Terraform API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # HashiCorp Terraform user/org API token
+            re.compile(r"""(?i)[a-z0-9]{14}\.atlasv1\.[a-z0-9\-_=]{60,70}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/heroku_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/heroku_api_key.py
new file mode 100644
index 000000000..53be8aa48
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/heroku_api_key.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Heroku API Keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class HerokuApiKeyDetector(RegexBasedDetector):
+    """Scans for Heroku API Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Heroku API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:heroku)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/hubspot_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/hubspot_api_key.py
new file mode 100644
index 000000000..230ef659b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/hubspot_api_key.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for HubSpot API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class HubSpotApiTokenDetector(RegexBasedDetector):
+    """Scans for HubSpot API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "HubSpot API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # HubSpot API Token
+            re.compile(
+                r"""(?i)(?:hubspot)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/huggingface.py b/enterprise/enterprise_hooks/secrets_plugins/huggingface.py
new file mode 100644
index 000000000..be83a3a0d
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/huggingface.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for Hugging Face Access and Organization API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class HuggingFaceDetector(RegexBasedDetector):
+    """Scans for Hugging Face Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Hugging Face Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Hugging Face Access token
+            re.compile(r"""(?:^|[\\'"` >=:])(hf_[a-zA-Z]{34})(?:$|[\\'"` <])"""),
+            # Hugging Face Organization API token
+            re.compile(
+                r"""(?:^|[\\'"` >=:\(,)])(api_org_[a-zA-Z]{34})(?:$|[\\'"` <\),])"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/intercom_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/intercom_api_key.py
new file mode 100644
index 000000000..24e16fc73
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/intercom_api_key.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Intercom API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class IntercomApiTokenDetector(RegexBasedDetector):
+    """Scans for Intercom API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Intercom API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:intercom)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{60})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/jfrog.py b/enterprise/enterprise_hooks/secrets_plugins/jfrog.py
new file mode 100644
index 000000000..3eabbfe3a
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/jfrog.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for JFrog-related secrets like API Key and Identity Token.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class JFrogDetector(RegexBasedDetector):
+    """Scans for JFrog-related secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "JFrog Secrets"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # JFrog API Key
+            re.compile(
+                r"""(?i)(?:jfrog|artifactory|bintray|xray)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{73})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # JFrog Identity Token
+            re.compile(
+                r"""(?i)(?:jfrog|artifactory|bintray|xray)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/jwt.py b/enterprise/enterprise_hooks/secrets_plugins/jwt.py
new file mode 100644
index 000000000..6658a0950
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/jwt.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Base64-encoded JSON Web Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class JWTBase64Detector(RegexBasedDetector):
+    """Scans for Base64-encoded JSON Web Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Base64-encoded JSON Web Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Base64-encoded JSON Web Token
+            re.compile(
+                r"""\bZXlK(?:(?P<alg>aGJHY2lPaU)|(?P<apu>aGNIVWlPaU)|(?P<apv>aGNIWWlPaU)|(?P<aud>aGRXUWlPaU)|(?P<b64>aU5qUWlP)|(?P<crit>amNtbDBJanBi)|(?P<cty>amRIa2lPaU)|(?P<epk>bGNHc2lPbn)|(?P<enc>bGJtTWlPaU)|(?P<jku>cWEzVWlPaU)|(?P<jwk>cWQyc2lPb)|(?P<iss>cGMzTWlPaU)|(?P<iv>cGRpSTZJ)|(?P<kid>cmFXUWlP)|(?P<key_ops>clpYbGZiM0J6SWpwY)|(?P<kty>cmRIa2lPaUp)|(?P<nonce>dWIyNWpaU0k2)|(?P<p2c>d01tTWlP)|(?P<p2s>d01uTWlPaU)|(?P<ppt>d2NIUWlPaU)|(?P<sub>emRXSWlPaU)|(?P<svt>emRuUWlP)|(?P<tag>MFlXY2lPaU)|(?P<typ>MGVYQWlPaUp)|(?P<url>MWNtd2l)|(?P<use>MWMyVWlPaUp)|(?P<ver>MlpYSWlPaU)|(?P<version>MlpYSnphVzl1SWpv)|(?P<x>NElqb2)|(?P<x5c>NE5XTWlP)|(?P<x5t>NE5YUWlPaU)|(?P<x5ts256>NE5YUWpVekkxTmlJNkl)|(?P<x5u>NE5YVWlPaU)|(?P<zip>NmFYQWlPaU))[a-zA-Z0-9\/\\_+\-\r\n]{40,}={0,2}"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/kraken_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/kraken_access_token.py
new file mode 100644
index 000000000..cb7357cfd
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/kraken_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Kraken Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class KrakenAccessTokenDetector(RegexBasedDetector):
+    """Scans for Kraken Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Kraken Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Kraken Access Token
+            re.compile(
+                r"""(?i)(?:kraken)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9\/=_\+\-]{80,90})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/kucoin.py b/enterprise/enterprise_hooks/secrets_plugins/kucoin.py
new file mode 100644
index 000000000..02e990bd8
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/kucoin.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Kucoin Access Tokens and Secret Keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class KucoinDetector(RegexBasedDetector):
+    """Scans for Kucoin Access Tokens and Secret Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Kucoin Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Kucoin Access Token
+            re.compile(
+                r"""(?i)(?:kucoin)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{24})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Kucoin Secret Key
+            re.compile(
+                r"""(?i)(?:kucoin)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/launchdarkly_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/launchdarkly_access_token.py
new file mode 100644
index 000000000..977990984
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/launchdarkly_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Launchdarkly Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class LaunchdarklyAccessTokenDetector(RegexBasedDetector):
+    """Scans for Launchdarkly Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Launchdarkly Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:launchdarkly)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/linear.py b/enterprise/enterprise_hooks/secrets_plugins/linear.py
new file mode 100644
index 000000000..1224b5ec4
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/linear.py
@@ -0,0 +1,26 @@
+"""
+This plugin searches for Linear API Tokens and Linear Client Secrets.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class LinearDetector(RegexBasedDetector):
+    """Scans for Linear secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Linear Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Linear API Token
+            re.compile(r"""(?i)lin_api_[a-z0-9]{40}"""),
+            # Linear Client Secret
+            re.compile(
+                r"""(?i)(?:linear)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/linkedin.py b/enterprise/enterprise_hooks/secrets_plugins/linkedin.py
new file mode 100644
index 000000000..53ff0c30a
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/linkedin.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for LinkedIn Client IDs and LinkedIn Client secrets.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class LinkedInDetector(RegexBasedDetector):
+    """Scans for LinkedIn secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "LinkedIn Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # LinkedIn Client ID
+            re.compile(
+                r"""(?i)(?:linkedin|linked-in)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{14})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # LinkedIn Client secret
+            re.compile(
+                r"""(?i)(?:linkedin|linked-in)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{16})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/lob.py b/enterprise/enterprise_hooks/secrets_plugins/lob.py
new file mode 100644
index 000000000..623ac4f1f
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/lob.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Lob API secrets and Lob Publishable API keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class LobDetector(RegexBasedDetector):
+    """Scans for Lob secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Lob Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Lob API Key
+            re.compile(
+                r"""(?i)(?:lob)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}((live|test)_[a-f0-9]{35})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Lob Publishable API Key
+            re.compile(
+                r"""(?i)(?:lob)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}((test|live)_pub_[a-f0-9]{31})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/mailgun.py b/enterprise/enterprise_hooks/secrets_plugins/mailgun.py
new file mode 100644
index 000000000..c403d2454
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/mailgun.py
@@ -0,0 +1,32 @@
+"""
+This plugin searches for Mailgun API secrets, public validation keys, and webhook signing keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class MailgunDetector(RegexBasedDetector):
+    """Scans for Mailgun secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Mailgun Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Mailgun Private API Token
+            re.compile(
+                r"""(?i)(?:mailgun)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(key-[a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Mailgun Public Validation Key
+            re.compile(
+                r"""(?i)(?:mailgun)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(pubkey-[a-f0-9]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Mailgun Webhook Signing Key
+            re.compile(
+                r"""(?i)(?:mailgun)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-h0-9]{32}-[a-h0-9]{8}-[a-h0-9]{8})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/mapbox_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/mapbox_api_token.py
new file mode 100644
index 000000000..0326b7102
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/mapbox_api_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for MapBox API tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class MapBoxApiTokenDetector(RegexBasedDetector):
+    """Scans for MapBox API tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "MapBox API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # MapBox API Token
+            re.compile(
+                r"""(?i)(?:mapbox)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(pk\.[a-z0-9]{60}\.[a-z0-9]{22})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/mattermost_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/mattermost_access_token.py
new file mode 100644
index 000000000..d65b0e755
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/mattermost_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Mattermost Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class MattermostAccessTokenDetector(RegexBasedDetector):
+    """Scans for Mattermost Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Mattermost Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Mattermost Access Token
+            re.compile(
+                r"""(?i)(?:mattermost)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{26})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/messagebird.py b/enterprise/enterprise_hooks/secrets_plugins/messagebird.py
new file mode 100644
index 000000000..6adc8317a
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/messagebird.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for MessageBird API tokens and client IDs.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class MessageBirdDetector(RegexBasedDetector):
+    """Scans for MessageBird secrets."""
+
+    @property
+    def secret_type(self) -> str:
+        return "MessageBird Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # MessageBird API Token
+            re.compile(
+                r"""(?i)(?:messagebird|message-bird|message_bird)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{25})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # MessageBird Client ID
+            re.compile(
+                r"""(?i)(?:messagebird|message-bird|message_bird)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/microsoft_teams_webhook.py b/enterprise/enterprise_hooks/secrets_plugins/microsoft_teams_webhook.py
new file mode 100644
index 000000000..298fd81b0
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/microsoft_teams_webhook.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Microsoft Teams Webhook URLs.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class MicrosoftTeamsWebhookDetector(RegexBasedDetector):
+    """Scans for Microsoft Teams Webhook URLs."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Microsoft Teams Webhook"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Microsoft Teams Webhook
+            re.compile(
+                r"""https:\/\/[a-z0-9]+\.webhook\.office\.com\/webhookb2\/[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}@[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}\/IncomingWebhook\/[a-z0-9]{32}\/[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/netlify_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/netlify_access_token.py
new file mode 100644
index 000000000..cc7a575a4
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/netlify_access_token.py
@@ -0,0 +1,24 @@
+"""
+This plugin searches for Netlify Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class NetlifyAccessTokenDetector(RegexBasedDetector):
+    """Scans for Netlify Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Netlify Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Netlify Access Token
+            re.compile(
+                r"""(?i)(?:netlify)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{40,46})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/new_relic.py b/enterprise/enterprise_hooks/secrets_plugins/new_relic.py
new file mode 100644
index 000000000..cef640155
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/new_relic.py
@@ -0,0 +1,32 @@
+"""
+This plugin searches for New Relic API tokens and keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class NewRelicDetector(RegexBasedDetector):
+    """Scans for New Relic API tokens and keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "New Relic API Secrets"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # New Relic ingest browser API token
+            re.compile(
+                r"""(?i)(?:new-relic|newrelic|new_relic)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(NRJS-[a-f0-9]{19})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # New Relic user API ID
+            re.compile(
+                r"""(?i)(?:new-relic|newrelic|new_relic)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # New Relic user API Key
+            re.compile(
+                r"""(?i)(?:new-relic|newrelic|new_relic)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(NRAK-[a-z0-9]{27})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/nytimes_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/nytimes_access_token.py
new file mode 100644
index 000000000..567b885e5
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/nytimes_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for New York Times Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class NYTimesAccessTokenDetector(RegexBasedDetector):
+    """Scans for New York Times Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "New York Times Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:nytimes|new-york-times,|newyorktimes)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{32})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/okta_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/okta_access_token.py
new file mode 100644
index 000000000..97109767b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/okta_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Okta Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class OktaAccessTokenDetector(RegexBasedDetector):
+    """Scans for Okta Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Okta Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:okta)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9=_\-]{42})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/openai_api_key.py b/enterprise/enterprise_hooks/secrets_plugins/openai_api_key.py
new file mode 100644
index 000000000..c5d20f759
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/openai_api_key.py
@@ -0,0 +1,19 @@
+"""
+This plugin searches for OpenAI API Keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class OpenAIApiKeyDetector(RegexBasedDetector):
+    """Scans for OpenAI API Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Strict OpenAI API Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [re.compile(r"""(sk-[a-zA-Z0-9]{5,})""")]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/planetscale.py b/enterprise/enterprise_hooks/secrets_plugins/planetscale.py
new file mode 100644
index 000000000..23a53667e
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/planetscale.py
@@ -0,0 +1,32 @@
+"""
+This plugin searches for PlanetScale API tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class PlanetScaleDetector(RegexBasedDetector):
+    """Scans for PlanetScale API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "PlanetScale API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # the PlanetScale API token
+            re.compile(
+                r"""(?i)\b(pscale_tkn_[a-z0-9=\-_\.]{32,64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # the PlanetScale OAuth token
+            re.compile(
+                r"""(?i)\b(pscale_oauth_[a-z0-9=\-_\.]{32,64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # the PlanetScale password
+            re.compile(
+                r"""(?i)\b(pscale_pw_[a-z0-9=\-_\.]{32,64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/postman_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/postman_api_token.py
new file mode 100644
index 000000000..9469e8191
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/postman_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Postman API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class PostmanApiTokenDetector(RegexBasedDetector):
+    """Scans for Postman API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Postman API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)\b(PMAK-[a-f0-9]{24}-[a-f0-9]{34})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/prefect_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/prefect_api_token.py
new file mode 100644
index 000000000..35cdb71ca
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/prefect_api_token.py
@@ -0,0 +1,19 @@
+"""
+This plugin searches for Prefect API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class PrefectApiTokenDetector(RegexBasedDetector):
+    """Scans for Prefect API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Prefect API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [re.compile(r"""(?i)\b(pnu_[a-z0-9]{36})(?:['|\"|\n|\r|\s|\x60|;]|$)""")]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/pulumi_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/pulumi_api_token.py
new file mode 100644
index 000000000..bae4ce211
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/pulumi_api_token.py
@@ -0,0 +1,19 @@
+"""
+This plugin searches for Pulumi API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class PulumiApiTokenDetector(RegexBasedDetector):
+    """Scans for Pulumi API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Pulumi API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [re.compile(r"""(?i)\b(pul-[a-f0-9]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)""")]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/pypi_upload_token.py b/enterprise/enterprise_hooks/secrets_plugins/pypi_upload_token.py
new file mode 100644
index 000000000..d4cc91385
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/pypi_upload_token.py
@@ -0,0 +1,19 @@
+"""
+This plugin searches for PyPI Upload Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class PyPiUploadTokenDetector(RegexBasedDetector):
+    """Scans for PyPI Upload Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "PyPI Upload Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [re.compile(r"""pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}""")]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/rapidapi_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/rapidapi_access_token.py
new file mode 100644
index 000000000..18b234614
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/rapidapi_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for RapidAPI Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class RapidApiAccessTokenDetector(RegexBasedDetector):
+    """Scans for RapidAPI Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "RapidAPI Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:rapidapi)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9_-]{50})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/readme_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/readme_api_token.py
new file mode 100644
index 000000000..47bdffb12
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/readme_api_token.py
@@ -0,0 +1,21 @@
+"""
+This plugin searches for Readme API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ReadmeApiTokenDetector(RegexBasedDetector):
+    """Scans for Readme API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Readme API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(r"""(?i)\b(rdme_[a-z0-9]{70})(?:['|\"|\n|\r|\s|\x60|;]|$)""")
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/rubygems_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/rubygems_api_token.py
new file mode 100644
index 000000000..d49c58e73
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/rubygems_api_token.py
@@ -0,0 +1,21 @@
+"""
+This plugin searches for Rubygem API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class RubygemsApiTokenDetector(RegexBasedDetector):
+    """Scans for Rubygem API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Rubygem API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(r"""(?i)\b(rubygems_[a-f0-9]{48})(?:['|\"|\n|\r|\s|\x60|;]|$)""")
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/scalingo_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/scalingo_api_token.py
new file mode 100644
index 000000000..3f8a59ee4
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/scalingo_api_token.py
@@ -0,0 +1,19 @@
+"""
+This plugin searches for Scalingo API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ScalingoApiTokenDetector(RegexBasedDetector):
+    """Scans for Scalingo API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Scalingo API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [re.compile(r"""\btk-us-[a-zA-Z0-9-_]{48}\b""")]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/sendbird.py b/enterprise/enterprise_hooks/secrets_plugins/sendbird.py
new file mode 100644
index 000000000..4b270d71e
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/sendbird.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Sendbird Access IDs and Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SendbirdDetector(RegexBasedDetector):
+    """Scans for Sendbird Access IDs and Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Sendbird Credential"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Sendbird Access ID
+            re.compile(
+                r"""(?i)(?:sendbird)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Sendbird Access Token
+            re.compile(
+                r"""(?i)(?:sendbird)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/sendgrid_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/sendgrid_api_token.py
new file mode 100644
index 000000000..bf974f4fd
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/sendgrid_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for SendGrid API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SendGridApiTokenDetector(RegexBasedDetector):
+    """Scans for SendGrid API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "SendGrid API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)\b(SG\.[a-z0-9=_\-\.]{66})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/sendinblue_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/sendinblue_api_token.py
new file mode 100644
index 000000000..a6ed8c15e
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/sendinblue_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for SendinBlue API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SendinBlueApiTokenDetector(RegexBasedDetector):
+    """Scans for SendinBlue API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "SendinBlue API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)\b(xkeysib-[a-f0-9]{64}-[a-z0-9]{16})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/sentry_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/sentry_access_token.py
new file mode 100644
index 000000000..181fad2c7
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/sentry_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Sentry Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SentryAccessTokenDetector(RegexBasedDetector):
+    """Scans for Sentry Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Sentry Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:sentry)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/shippo_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/shippo_api_token.py
new file mode 100644
index 000000000..4314c6876
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/shippo_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Shippo API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ShippoApiTokenDetector(RegexBasedDetector):
+    """Scans for Shippo API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Shippo API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)\b(shippo_(live|test)_[a-f0-9]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/shopify.py b/enterprise/enterprise_hooks/secrets_plugins/shopify.py
new file mode 100644
index 000000000..f5f97c447
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/shopify.py
@@ -0,0 +1,31 @@
+"""
+This plugin searches for Shopify Access Tokens, Custom Access Tokens,
+Private App Access Tokens, and Shared Secrets.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ShopifyDetector(RegexBasedDetector):
+    """Scans for Shopify Access Tokens, Custom Access Tokens, Private App Access Tokens,
+    and Shared Secrets.
+    """
+
+    @property
+    def secret_type(self) -> str:
+        return "Shopify Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Shopify access token
+            re.compile(r"""shpat_[a-fA-F0-9]{32}"""),
+            # Shopify custom access token
+            re.compile(r"""shpca_[a-fA-F0-9]{32}"""),
+            # Shopify private app access token
+            re.compile(r"""shppa_[a-fA-F0-9]{32}"""),
+            # Shopify shared secret
+            re.compile(r"""shpss_[a-fA-F0-9]{32}"""),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/sidekiq.py b/enterprise/enterprise_hooks/secrets_plugins/sidekiq.py
new file mode 100644
index 000000000..431ce7b8e
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/sidekiq.py
@@ -0,0 +1,28 @@
+"""
+This plugin searches for Sidekiq secrets and sensitive URLs.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SidekiqDetector(RegexBasedDetector):
+    """Scans for Sidekiq secrets and sensitive URLs."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Sidekiq Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Sidekiq Secret
+            re.compile(
+                r"""(?i)(?:BUNDLE_ENTERPRISE__CONTRIBSYS__COM|BUNDLE_GEMS__CONTRIBSYS__COM)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{8}:[a-f0-9]{8})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Sidekiq Sensitive URL
+            re.compile(
+                r"""(?i)\b(http(?:s??):\/\/)([a-f0-9]{8}:[a-f0-9]{8})@(?:gems.contribsys.com|enterprise.contribsys.com)(?:[\/|\#|\?|:]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/slack.py b/enterprise/enterprise_hooks/secrets_plugins/slack.py
new file mode 100644
index 000000000..4896fd76b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/slack.py
@@ -0,0 +1,38 @@
+"""
+This plugin searches for Slack tokens and webhooks.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SlackDetector(RegexBasedDetector):
+    """Scans for Slack tokens and webhooks."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Slack Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Slack App-level token
+            re.compile(r"""(?i)(xapp-\d-[A-Z0-9]+-\d+-[a-z0-9]+)"""),
+            # Slack Bot token
+            re.compile(r"""(xoxb-[0-9]{10,13}\-[0-9]{10,13}[a-zA-Z0-9-]*)"""),
+            # Slack Configuration access token and refresh token
+            re.compile(r"""(?i)(xoxe.xox[bp]-\d-[A-Z0-9]{163,166})"""),
+            re.compile(r"""(?i)(xoxe-\d-[A-Z0-9]{146})"""),
+            # Slack Legacy bot token and token
+            re.compile(r"""(xoxb-[0-9]{8,14}\-[a-zA-Z0-9]{18,26})"""),
+            re.compile(r"""(xox[os]-\d+-\d+-\d+-[a-fA-F\d]+)"""),
+            # Slack Legacy Workspace token
+            re.compile(r"""(xox[ar]-(?:\d-)?[0-9a-zA-Z]{8,48})"""),
+            # Slack User token and enterprise token
+            re.compile(r"""(xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34})"""),
+            # Slack Webhook URL
+            re.compile(
+                r"""(https?:\/\/)?hooks.slack.com\/(services|workflows)\/[A-Za-z0-9+\/]{43,46}"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/snyk_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/snyk_api_token.py
new file mode 100644
index 000000000..839bb5731
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/snyk_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Snyk API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SnykApiTokenDetector(RegexBasedDetector):
+    """Scans for Snyk API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Snyk API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:snyk)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/squarespace_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/squarespace_access_token.py
new file mode 100644
index 000000000..0dc83ad91
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/squarespace_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Squarespace Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SquarespaceAccessTokenDetector(RegexBasedDetector):
+    """Scans for Squarespace Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Squarespace Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:squarespace)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/sumologic.py b/enterprise/enterprise_hooks/secrets_plugins/sumologic.py
new file mode 100644
index 000000000..7117629ac
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/sumologic.py
@@ -0,0 +1,22 @@
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class SumoLogicDetector(RegexBasedDetector):
+    """Scans for SumoLogic Access ID and Access Token."""
+
+    @property
+    def secret_type(self) -> str:
+        return "SumoLogic"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i:(?:sumo)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3})(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(su[a-zA-Z0-9]{12})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            re.compile(
+                r"""(?i)(?:sumo)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{64})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/telegram_bot_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/telegram_bot_api_token.py
new file mode 100644
index 000000000..30854fda1
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/telegram_bot_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Telegram Bot API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class TelegramBotApiTokenDetector(RegexBasedDetector):
+    """Scans for Telegram Bot API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Telegram Bot API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:^|[^0-9])([0-9]{5,16}:A[a-zA-Z0-9_\-]{34})(?:$|[^a-zA-Z0-9_\-])"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/travisci_access_token.py b/enterprise/enterprise_hooks/secrets_plugins/travisci_access_token.py
new file mode 100644
index 000000000..90f9b48f4
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/travisci_access_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Travis CI Access Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class TravisCiAccessTokenDetector(RegexBasedDetector):
+    """Scans for Travis CI Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Travis CI Access Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:travis)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{22})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/twitch_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/twitch_api_token.py
new file mode 100644
index 000000000..1e0e3ccf8
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/twitch_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Twitch API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class TwitchApiTokenDetector(RegexBasedDetector):
+    """Scans for Twitch API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Twitch API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:twitch)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{30})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/twitter.py b/enterprise/enterprise_hooks/secrets_plugins/twitter.py
new file mode 100644
index 000000000..99ad170d1
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/twitter.py
@@ -0,0 +1,36 @@
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class TwitterDetector(RegexBasedDetector):
+    """Scans for Twitter Access Secrets, Access Tokens, API Keys, API Secrets, and Bearer Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Twitter Secret"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Twitter Access Secret
+            re.compile(
+                r"""(?i)(?:twitter)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{45})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Twitter Access Token
+            re.compile(
+                r"""(?i)(?:twitter)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9]{15,25}-[a-zA-Z0-9]{20,40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Twitter API Key
+            re.compile(
+                r"""(?i)(?:twitter)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{25})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Twitter API Secret
+            re.compile(
+                r"""(?i)(?:twitter)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{50})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Twitter Bearer Token
+            re.compile(
+                r"""(?i)(?:twitter)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(A{22}[a-zA-Z0-9%]{80,100})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/typeform_api_token.py b/enterprise/enterprise_hooks/secrets_plugins/typeform_api_token.py
new file mode 100644
index 000000000..8d9dc0e87
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/typeform_api_token.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Typeform API Tokens.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class TypeformApiTokenDetector(RegexBasedDetector):
+    """Scans for Typeform API Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Typeform API Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:typeform)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(tfp_[a-z0-9\-_\.=]{59})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/vault.py b/enterprise/enterprise_hooks/secrets_plugins/vault.py
new file mode 100644
index 000000000..5ca552cd9
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/vault.py
@@ -0,0 +1,24 @@
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class VaultDetector(RegexBasedDetector):
+    """Scans for Vault Batch Tokens and Vault Service Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Vault Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Vault Batch Token
+            re.compile(
+                r"""(?i)\b(hvb\.[a-z0-9_-]{138,212})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Vault Service Token
+            re.compile(
+                r"""(?i)\b(hvs\.[a-z0-9_-]{90,100})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/yandex.py b/enterprise/enterprise_hooks/secrets_plugins/yandex.py
new file mode 100644
index 000000000..a58faec0d
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/yandex.py
@@ -0,0 +1,28 @@
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class YandexDetector(RegexBasedDetector):
+    """Scans for Yandex Access Tokens, API Keys, and AWS Access Tokens."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Yandex Token"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            # Yandex Access Token
+            re.compile(
+                r"""(?i)(?:yandex)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(t1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Yandex API Key
+            re.compile(
+                r"""(?i)(?:yandex)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(AQVN[A-Za-z0-9_\-]{35,38})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+            # Yandex AWS Access Token
+            re.compile(
+                r"""(?i)(?:yandex)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}(YC[a-zA-Z0-9_\-]{38})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            ),
+        ]
diff --git a/enterprise/enterprise_hooks/secrets_plugins/zendesk_secret_key.py b/enterprise/enterprise_hooks/secrets_plugins/zendesk_secret_key.py
new file mode 100644
index 000000000..42c087c5b
--- /dev/null
+++ b/enterprise/enterprise_hooks/secrets_plugins/zendesk_secret_key.py
@@ -0,0 +1,23 @@
+"""
+This plugin searches for Zendesk Secret Keys.
+"""
+
+import re
+
+from detect_secrets.plugins.base import RegexBasedDetector
+
+
+class ZendeskSecretKeyDetector(RegexBasedDetector):
+    """Scans for Zendesk Secret Keys."""
+
+    @property
+    def secret_type(self) -> str:
+        return "Zendesk Secret Key"
+
+    @property
+    def denylist(self) -> list[re.Pattern]:
+        return [
+            re.compile(
+                r"""(?i)(?:zendesk)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{40})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
+            )
+        ]
diff --git a/litellm/tests/test_secret_detect_hook.py b/litellm/tests/test_secret_detect_hook.py
index cb1e01810..2c2007164 100644
--- a/litellm/tests/test_secret_detect_hook.py
+++ b/litellm/tests/test_secret_detect_hook.py
@@ -69,6 +69,10 @@ async def test_basic_secret_detection_chat():
                 "role": "user",
                 "content": "this is my OPENAI_API_KEY = 'sk_1234567890abcdef'",
             },
+            {
+                "role": "user",
+                "content": "My hi API Key is sk-Pc4nlxVoMz41290028TbMCxx, does it seem to be in the correct format?",
+            },
             {"role": "user", "content": "i think it is +1 412-555-5555"},
         ],
         "model": "gpt-3.5-turbo",
@@ -93,6 +97,10 @@ async def test_basic_secret_detection_chat():
                 "content": "Hello! I'm doing well. How can I assist you today?",
             },
             {"role": "user", "content": "this is my OPENAI_API_KEY = '[REDACTED]'"},
+            {
+                "role": "user",
+                "content": "My hi API Key is [REDACTED], does it seem to be in the correct format?",
+            },
             {"role": "user", "content": "i think it is +1 412-555-5555"},
         ],
         "model": "gpt-3.5-turbo",

From 3faf668ac2f0af22eedcb424f5cdf51270adc0ce Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 15:20:30 -0700
Subject: [PATCH 146/150] fix secret scanner

---
 .../secrets_plugins/sidekiq.py                | 28 -------------------
 1 file changed, 28 deletions(-)
 delete mode 100644 enterprise/enterprise_hooks/secrets_plugins/sidekiq.py

diff --git a/enterprise/enterprise_hooks/secrets_plugins/sidekiq.py b/enterprise/enterprise_hooks/secrets_plugins/sidekiq.py
deleted file mode 100644
index 431ce7b8e..000000000
--- a/enterprise/enterprise_hooks/secrets_plugins/sidekiq.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-This plugin searches for Sidekiq secrets and sensitive URLs.
-"""
-
-import re
-
-from detect_secrets.plugins.base import RegexBasedDetector
-
-
-class SidekiqDetector(RegexBasedDetector):
-    """Scans for Sidekiq secrets and sensitive URLs."""
-
-    @property
-    def secret_type(self) -> str:
-        return "Sidekiq Secret"
-
-    @property
-    def denylist(self) -> list[re.Pattern]:
-        return [
-            # Sidekiq Secret
-            re.compile(
-                r"""(?i)(?:BUNDLE_ENTERPRISE__CONTRIBSYS__COM|BUNDLE_GEMS__CONTRIBSYS__COM)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-f0-9]{8}:[a-f0-9]{8})(?:['|\"|\n|\r|\s|\x60|;]|$)"""
-            ),
-            # Sidekiq Sensitive URL
-            re.compile(
-                r"""(?i)\b(http(?:s??):\/\/)([a-f0-9]{8}:[a-f0-9]{8})@(?:gems.contribsys.com|enterprise.contribsys.com)(?:[\/|\#|\?|:]|$)"""
-            ),
-        ]

From 4cffcb5f2814f34660d824dc122d0b2c38d8a478 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 16:29:11 -0700
Subject: [PATCH 147/150] fix error message on v2/model info

---
 litellm/proxy/proxy_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index b9972a723..710b3d11d 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -6284,7 +6284,7 @@ async def model_info_v2(
         raise HTTPException(
             status_code=500,
             detail={
-                "error": f"Invalid llm model list. llm_model_list={llm_model_list}"
+                "error": f"No model list passed, models={llm_model_list}. You can add a model through the config.yaml or on the LiteLLM Admin UI."
             },
         )
 

From e82b321044269816f1728284a0b96e1a2acbb1d2 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 17:42:44 -0700
Subject: [PATCH 148/150] test fix secret detection

---
 enterprise/enterprise_hooks/secret_detection.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
index 23dd2a7e0..d2bd22a5d 100644
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -379,10 +379,6 @@ _default_detect_secrets_config = {
             "name": "ShopifyDetector",
             "path": _custom_plugins_path + "/shopify.py",
         },
-        {
-            "name": "SidekiqDetector",
-            "path": _custom_plugins_path + "/sidekiq.py",
-        },
         {
             "name": "SlackDetector",
             "path": _custom_plugins_path + "/slack.py",

From e776ac8ffc011d6537aca200703f95f8e01cf55f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 20:25:09 -0700
Subject: [PATCH 149/150] ci/cd run again

---
 litellm/tests/test_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 5138e9b61..1c10ef461 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries = 3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"

From 511dd18e4beb6289e65a3d8876d95f758fd24e27 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 27 Jun 2024 20:58:29 -0700
Subject: [PATCH 150/150] remove debug print statement

---
 litellm/caching.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/litellm/caching.py b/litellm/caching.py
index 19c1431a2..64488289a 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -97,19 +97,13 @@ class InMemoryCache(BaseCache):
         """
         for key in list(self.ttl_dict.keys()):
             if time.time() > self.ttl_dict[key]:
-                print(  # noqa
-                    "Cache Evicting item key=",
-                    key,
-                    "ttl=",
-                    self.ttl_dict[key],
-                    "size of cache=",
-                    len(self.cache_dict),
-                )
                 self.cache_dict.pop(key, None)
                 self.ttl_dict.pop(key, None)
 
     def set_cache(self, key, value, **kwargs):
-        print_verbose("InMemoryCache: set_cache")
+        print_verbose(
+            "InMemoryCache: set_cache. current size= {}".format(len(self.cache_dict))
+        )
         if len(self.cache_dict) >= self.max_size_in_memory:
             # only evict when cache is full
             self.evict_cache()