Merge branch 'main' into litellm_bedrock_converse_api

2024-06-07 08:49:52 -07:00 · 2024-06-07 08:49:52 -07:00 · 26993c067e
commit 26993c067e
parent 35e4323095 f6a262122b
82 changed files with 2540 additions and 1147 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -344,4 +344,4 @@ workflows:
          filters:
            branches:
              only:
-                - main
+                - main
--- a/README.md
+++ b/README.md
@ -225,37 +225,37 @@ curl 'http://0.0.0.0:4000/key/generate' \
 ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))

 | Provider                                                                            | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
-| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
-| [openai](https://docs.litellm.ai/docs/providers/openai)                             | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            | ✅                                                                      |
-| [azure](https://docs.litellm.ai/docs/providers/azure)                               | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            | ✅                                                                      |
-| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker)             | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            |
-| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock)                     | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            |
-| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex)        | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅  | ✅
-| [google - palm](https://docs.litellm.ai/docs/providers/palm)                        | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini)          | ✅                                                      |       ✅                                                                          | ✅                                                                                  |     ✅                                                                              |                                                                               |
-| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral)                    | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            |
-| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers)  | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [cohere](https://docs.litellm.ai/docs/providers/cohere)                             | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            |
-| [anthropic](https://docs.litellm.ai/docs/providers/anthropic)                       | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [huggingface](https://docs.litellm.ai/docs/providers/huggingface)                   | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            |
-| [replicate](https://docs.litellm.ai/docs/providers/replicate)                       | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [together_ai](https://docs.litellm.ai/docs/providers/togetherai)                    | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [openrouter](https://docs.litellm.ai/docs/providers/openrouter)                     | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [ai21](https://docs.litellm.ai/docs/providers/ai21)                                 | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [baseten](https://docs.litellm.ai/docs/providers/baseten)                           | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [vllm](https://docs.litellm.ai/docs/providers/vllm)                                 | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud)                       | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha)                   | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [petals](https://docs.litellm.ai/docs/providers/petals)                             | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [ollama](https://docs.litellm.ai/docs/providers/ollama)                             | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅                                                                            |
-| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra)                       | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity)                  | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [Groq AI](https://docs.litellm.ai/docs/providers/groq)                              | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek)                         | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [anyscale](https://docs.litellm.ai/docs/providers/anyscale)                         | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
-| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx)                  | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                | ✅ 
-| [voyage ai](https://docs.litellm.ai/docs/providers/voyage)                          |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                            |
-| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                            |
+|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
+| [openai](https://docs.litellm.ai/docs/providers/openai)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             | ✅                                                                       |
+| [azure](https://docs.litellm.ai/docs/providers/azure)                               | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             | ✅                                                                       |
+| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker)             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock)                     | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex)                 | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             | ✅                                                                       |
+| [google - palm](https://docs.litellm.ai/docs/providers/palm)                        | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini)          | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral)                    | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers)  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [cohere](https://docs.litellm.ai/docs/providers/cohere)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [anthropic](https://docs.litellm.ai/docs/providers/anthropic)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [huggingface](https://docs.litellm.ai/docs/providers/huggingface)                   | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [replicate](https://docs.litellm.ai/docs/providers/replicate)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [together_ai](https://docs.litellm.ai/docs/providers/togetherai)                    | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [openrouter](https://docs.litellm.ai/docs/providers/openrouter)                     | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [ai21](https://docs.litellm.ai/docs/providers/ai21)                                 | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [baseten](https://docs.litellm.ai/docs/providers/baseten)                           | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [vllm](https://docs.litellm.ai/docs/providers/vllm)                                 | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha)                   | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [petals](https://docs.litellm.ai/docs/providers/petals)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [ollama](https://docs.litellm.ai/docs/providers/ollama)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity)                  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [Groq AI](https://docs.litellm.ai/docs/providers/groq)                              | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek)                         | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [anyscale](https://docs.litellm.ai/docs/providers/anyscale)                         | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx)                  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [voyage ai](https://docs.litellm.ai/docs/providers/voyage)                          |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                             |                                                                         |
+| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                             |                                                                         |

 [**Read the Docs**](https://docs.litellm.ai/docs/)

--- a/docs/my-website/docs/enterprise.md
+++ b/docs/my-website/docs/enterprise.md
@ -10,6 +10,7 @@ For companies that need SSO, user management and professional support for LiteLL
 This covers: 
 - ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)**
 - ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui)
+- ✅ [**Audit Logs with retention policy**](../docs/proxy/enterprise.md#audit-logs)
 - ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
 - ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
 - ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)
--- a/docs/my-website/docs/observability/custom_callback.md
+++ b/docs/my-website/docs/observability/custom_callback.md
@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger):
        print(f"On Async Success")

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
-        print(f"On Async Success")
+        print(f"On Async Failure")

 customHandler = MyCustomHandler()

--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -144,6 +144,26 @@ print(response)

 ```

+You can also pass `metadata` as part of the request header with a `langfuse_*` prefix:
+
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --header 'Content-Type: application/json' \    
+    --header 'langfuse_trace_id: trace-id22' \
+    --header 'langfuse_trace_user_id: user-id2' \
+    --header 'langfuse_trace_metadata: {"key":"value"}' \
+    --data '{
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what llm are you"
+        }
+    ]
+}'
+```
+
+
 ### Trace & Generation Parameters

 #### Trace Specific Parameters
--- a/docs/my-website/docs/projects/llmcord.py
+++ b/docs/my-website/docs/projects/llmcord.py
@ -0,0 +1,3 @@
+llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
+
+Github: https://github.com/jakobdylanc/discord-llm-chatbot
--- a/docs/my-website/docs/providers/groq.md
+++ b/docs/my-website/docs/providers/groq.md
@ -46,13 +46,13 @@ for chunk in response:
 ## Supported Models - ALL Groq Models Supported!
 We support ALL Groq models, just set `groq/` as a prefix when sending completion requests

-| Model Name               | Function Call                                                                                                                                                      |
-|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` | 
-| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` | 
-| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` | 
+| Model Name         | Function Call                                           |
+|--------------------|---------------------------------------------------------|
+| llama3-8b-8192     | `completion(model="groq/llama3-8b-8192", messages)`     | 
+| llama3-70b-8192    | `completion(model="groq/llama3-70b-8192", messages)`    | 
+| llama2-70b-4096    | `completion(model="groq/llama2-70b-4096", messages)`    | 
 | mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` |
-| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` |  
+| gemma-7b-it        | `completion(model="groq/gemma-7b-it", messages)`        |  

 ## Groq - Tool / Function Calling Example

--- a/docs/my-website/docs/providers/togetherai.md
+++ b/docs/my-website/docs/providers/togetherai.md
@ -26,52 +26,52 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe


 ### Llama LLMs - Chat
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| togethercomputer/llama-2-70b-chat  | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)`            | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                        | Function Call                                                           | Required OS Variables              |
+|-----------------------------------|-------------------------------------------------------------------------|------------------------------------|
+| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |

 ### Llama LLMs - Language / Instruct
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| togethercomputer/llama-2-70b      | `completion('together_ai/togethercomputer/llama-2-70b', messages)`                | `os.environ['TOGETHERAI_API_KEY']` |
-| togethercomputer/LLaMA-2-7B-32K    | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |
-| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)`  | `os.environ['TOGETHERAI_API_KEY']` |
-| togethercomputer/llama-2-7b        | `completion('together_ai/togethercomputer/llama-2-7b', messages)`                  | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                               | Function Call                                                                  | Required OS Variables              |
+|------------------------------------------|--------------------------------------------------------------------------------|------------------------------------|
+| togethercomputer/llama-2-70b             | `completion('together_ai/togethercomputer/llama-2-70b', messages)`             | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/LLaMA-2-7B-32K          | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)`          | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/llama-2-7b              | `completion('together_ai/togethercomputer/llama-2-7b', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |

 ### Falcon LLMs
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)`      | `os.environ['TOGETHERAI_API_KEY']` |
-| togethercomputer/falcon-7b-instruct  | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)`        | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                           | Function Call                                                              | Required OS Variables              |
+|--------------------------------------|----------------------------------------------------------------------------|------------------------------------|
+| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/falcon-7b-instruct  | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)`  | `os.environ['TOGETHERAI_API_KEY']` |

 ### Alpaca LLMs
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| togethercomputer/alpaca-7b        | `completion('together_ai/togethercomputer/alpaca-7b', messages)`                  | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                 | Function Call                                                    | Required OS Variables              |
+|----------------------------|------------------------------------------------------------------|------------------------------------|
+| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |

 ### Other Chat LLMs
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| HuggingFaceH4/starchat-alpha      | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)`                | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                   | Function Call                                                      | Required OS Variables              |
+|------------------------------|--------------------------------------------------------------------|------------------------------------|
+| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |

 ### Code LLMs
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| togethercomputer/CodeLlama-34b     | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |
-| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)`  | `os.environ['TOGETHERAI_API_KEY']` |
-| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)`    | `os.environ['TOGETHERAI_API_KEY']` |
-| defog/sqlcoder                     | `completion('together_ai/defog/sqlcoder', messages)`                                | `os.environ['TOGETHERAI_API_KEY']` |
-| NumbersStation/nsql-llama-2-7B     | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)`            | `os.environ['TOGETHERAI_API_KEY']` |
-| WizardLM/WizardCoder-15B-V1.0      | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |
-| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)`    | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                              | Function Call                                                                 | Required OS Variables              |
+|-----------------------------------------|-------------------------------------------------------------------------------|------------------------------------|
+| togethercomputer/CodeLlama-34b          | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)`          | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
+| togethercomputer/CodeLlama-34b-Python   | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)`   | `os.environ['TOGETHERAI_API_KEY']` |
+| defog/sqlcoder                          | `completion('together_ai/defog/sqlcoder', messages)`                          | `os.environ['TOGETHERAI_API_KEY']` |
+| NumbersStation/nsql-llama-2-7B          | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)`          | `os.environ['TOGETHERAI_API_KEY']` |
+| WizardLM/WizardCoder-15B-V1.0           | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)`           | `os.environ['TOGETHERAI_API_KEY']` |
+| WizardLM/WizardCoder-Python-34B-V1.0    | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)`    | `os.environ['TOGETHERAI_API_KEY']` |

 ### Language LLMs
-| Model Name                        | Function Call                                                          | Required OS Variables           |
-|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
-| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)`    | `os.environ['TOGETHERAI_API_KEY']` |
-| Austism/chronos-hermes-13b         | `completion('together_ai/Austism/chronos-hermes-13b', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |
-| upstage/SOLAR-0-70b-16bit          | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |
-| WizardLM/WizardLM-70B-V1.0          | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)`              | `os.environ['TOGETHERAI_API_KEY']` |
+| Model Name                          | Function Call                                                             | Required OS Variables              |
+|-------------------------------------|---------------------------------------------------------------------------|------------------------------------|
+| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
+| Austism/chronos-hermes-13b          | `completion('together_ai/Austism/chronos-hermes-13b', messages)`          | `os.environ['TOGETHERAI_API_KEY']` |
+| upstage/SOLAR-0-70b-16bit           | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)`           | `os.environ['TOGETHERAI_API_KEY']` |
+| WizardLM/WizardLM-70B-V1.0          | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)`          | `os.environ['TOGETHERAI_API_KEY']` |


 ## Prompt Templates
--- a/docs/my-website/docs/providers/vllm.md
+++ b/docs/my-website/docs/providers/vllm.md
@ -155,14 +155,14 @@ def default_pt(messages):

 #### Models we already have Prompt Templates for

-| Model Name | Works for Models | Function Call |
-| -------- | -------- | -------- |
-| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models| `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
-| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
-| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
-| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |
-| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
-| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` |
+| Model Name                           | Works for Models                  | Function Call                                                                                                    |
+|--------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------|
+| meta-llama/Llama-2-7b-chat           | All meta-llama llama2 chat models | `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")`                |
+| tiiuae/falcon-7b-instruct            | All falcon instruct models        | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")`            |
+| mosaicml/mpt-7b-chat                 | All mpt chat models               | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")`                 |
+| codellama/CodeLlama-34b-Instruct-hf  | All codellama instruct models     | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")`  |
+| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models            | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
+| Phind/Phind-CodeLlama-34B-v2         | All phind-codellama models        | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")`         |

 #### Custom prompt templates

--- a/docs/my-website/docs/providers/watsonx.md
+++ b/docs/my-website/docs/providers/watsonx.md
@ -251,23 +251,23 @@ response = completion(

 Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM:

-| Mode Name | Command |
-| ---------- | --------- |
-| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
-| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
-| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
-| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` |
-| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` |
-| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` |
-| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` |
-| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` |
-| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` |
-| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` |
-| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` |
-| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` |
-| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` |
-| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` |
-| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |
+| Mode Name                          | Command                                                                                  |
+|------------------------------------|------------------------------------------------------------------------------------------|
+| Flan T5 XXL                        | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)`                        |
+| Flan Ul2                           | `completion(model=watsonx/google/flan-ul2, messages=messages)`                           |
+| Mt0 XXL                            | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)`                        |
+| Gpt Neox                           | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)`                   |
+| Mpt 7B Instruct2                   | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)`                      |
+| Starcoder                          | `completion(model=watsonx/bigcode/starcoder, messages=messages)`                         |
+| Llama 2 70B Chat                   | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)`               |
+| Llama 2 13B Chat                   | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)`               |
+| Granite 13B Instruct               | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)`               |
+| Granite 13B Chat                   | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)`                   |
+| Flan T5 XL                         | `completion(model=watsonx/google/flan-t5-xl, messages=messages)`                         |
+| Granite 13B Chat V2                | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)`                   |
+| Granite 13B Instruct V2            | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)`               |
+| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)`  |
+| Mixtral 8X7B Instruct V01 Q        | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |


 For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp).
@ -275,10 +275,10 @@ For a list of all available models in watsonx.ai, see [here](https://dataplatfor

 ## Supported IBM watsonx.ai Embedding Models

-| Model Name           | Function Call                               |
-|----------------------|---------------------------------------------|
-| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
-| Slate 125m  | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |
+| Model Name | Function Call                                                          |
+|------------|------------------------------------------------------------------------|
+| Slate 30m  | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)`  |
+| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |


 For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx).
--- a/docs/my-website/docs/providers/xinference.md
+++ b/docs/my-website/docs/providers/xinference.md
@ -37,26 +37,26 @@ print(response)
 ## Supported Models
 All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported

-| Model Name                   | Function Call                                          |
-|------------------------------|--------------------------------------------------------|
-| bge-base-en                  | `embedding(model="xinference/bge-base-en", input)`                  |
-| bge-base-en-v1.5             | `embedding(model="xinference/bge-base-en-v1.5", input)`             |
-| bge-base-zh                  | `embedding(model="xinference/bge-base-zh", input)`                  |
-| bge-base-zh-v1.5             | `embedding(model="xinference/bge-base-zh-v1.5", input)`             |
-| bge-large-en                 | `embedding(model="xinference/bge-large-en", input)`                 |
-| bge-large-en-v1.5            | `embedding(model="xinference/bge-large-en-v1.5", input)`            |
-| bge-large-zh                 | `embedding(model="xinference/bge-large-zh", input)`                 |
-| bge-large-zh-noinstruct      | `embedding(model="xinference/bge-large-zh-noinstruct", input)`      |
-| bge-large-zh-v1.5            | `embedding(model="xinference/bge-large-zh-v1.5", input)`            |
-| bge-small-en-v1.5            | `embedding(model="xinference/bge-small-en-v1.5", input)`            |
-| bge-small-zh                 | `embedding(model="xinference/bge-small-zh", input)`                 |
-| bge-small-zh-v1.5            | `embedding(model="xinference/bge-small-zh-v1.5", input)`            |
-| e5-large-v2                  | `embedding(model="xinference/e5-large-v2", input)`                  |
-| gte-base                     | `embedding(model="xinference/gte-base", input)`                     |
-| gte-large                    | `embedding(model="xinference/gte-large", input)`                    |
-| jina-embeddings-v2-base-en   | `embedding(model="xinference/jina-embeddings-v2-base-en", input)`   |
-| jina-embeddings-v2-small-en  | `embedding(model="xinference/jina-embeddings-v2-small-en", input)`  |
-| multilingual-e5-large        | `embedding(model="xinference/multilingual-e5-large", input)`        |
+| Model Name                  | Function Call                                                      |
+|-----------------------------|--------------------------------------------------------------------|
+| bge-base-en                 | `embedding(model="xinference/bge-base-en", input)`                 |
+| bge-base-en-v1.5            | `embedding(model="xinference/bge-base-en-v1.5", input)`            |
+| bge-base-zh                 | `embedding(model="xinference/bge-base-zh", input)`                 |
+| bge-base-zh-v1.5            | `embedding(model="xinference/bge-base-zh-v1.5", input)`            |
+| bge-large-en                | `embedding(model="xinference/bge-large-en", input)`                |
+| bge-large-en-v1.5           | `embedding(model="xinference/bge-large-en-v1.5", input)`           |
+| bge-large-zh                | `embedding(model="xinference/bge-large-zh", input)`                |
+| bge-large-zh-noinstruct     | `embedding(model="xinference/bge-large-zh-noinstruct", input)`     |
+| bge-large-zh-v1.5           | `embedding(model="xinference/bge-large-zh-v1.5", input)`           |
+| bge-small-en-v1.5           | `embedding(model="xinference/bge-small-en-v1.5", input)`           |
+| bge-small-zh                | `embedding(model="xinference/bge-small-zh", input)`                |
+| bge-small-zh-v1.5           | `embedding(model="xinference/bge-small-zh-v1.5", input)`           |
+| e5-large-v2                 | `embedding(model="xinference/e5-large-v2", input)`                 |
+| gte-base                    | `embedding(model="xinference/gte-base", input)`                    |
+| gte-large                   | `embedding(model="xinference/gte-large", input)`                   |
+| jina-embeddings-v2-base-en  | `embedding(model="xinference/jina-embeddings-v2-base-en", input)`  |
+| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` |
+| multilingual-e5-large       | `embedding(model="xinference/multilingual-e5-large", input)`       |



--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@ -260,7 +260,7 @@ Requirements:

 <TabItem value="docker-deploy" label="Dockerfile">

-We maintain a [seperate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database 
+We maintain a [separate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database 

 ```shell
 docker pull ghcr.io/berriai/litellm-database:main-latest
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -2,30 +2,213 @@ import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';

-# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger
+# ✨ Enterprise Features - SSO, Audit Logs, Guardrails

-Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise)
+:::tip

-:::info
-
-[Get Started with Enterprise here](https://github.com/BerriAI/litellm/tree/main/enterprise)
+Get in touch with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)

 :::

 Features: 
 - ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features)
- ✅ Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection-lakeraai)
+- ✅ [Audit Logs](#audit-logs)
+- ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
+- ✅ [Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations](#content-moderation)
+- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection---lakeraai)
+- ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
 - ✅ Reject calls from Blocked User list 
 - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags
- ✅ Custom Branding + Routes on Swagger Docs
- ✅ Audit Logs for `Created At, Created By` when Models Added
+
+## Audit Logs
+
+Store Audit logs for **Create, Update Delete Operations** done on `Teams` and `Virtual Keys`
+
+**Step 1** Switch on audit Logs 
+```shell
+litellm_settings:
+  store_audit_logs: true
+```
+
+Start the litellm proxy with this config
+
+**Step 2** Test it - Create a Team
+
+```shell
+curl --location 'http://0.0.0.0:4000/team/new' \
+    --header 'Authorization: Bearer sk-1234' \
+    --header 'Content-Type: application/json' \
+    --data '{
+        "max_budget": 2
+    }'
+```
+
+**Step 3** Expected Log
+
+```json
+{
+ "id": "e1760e10-4264-4499-82cd-c08c86c8d05b",
+ "updated_at": "2024-06-06T02:10:40.836420+00:00",
+ "changed_by": "109010464461339474872",
+ "action": "created",
+ "table_name": "LiteLLM_TeamTable",
+ "object_id": "82e725b5-053f-459d-9a52-867191635446",
+ "before_value": null,
+ "updated_values": {
+   "team_id": "82e725b5-053f-459d-9a52-867191635446",
+   "admins": [],
+   "members": [],
+   "members_with_roles": [
+     {
+       "role": "admin",
+       "user_id": "109010464461339474872"
+     }
+   ],
+   "max_budget": 2.0,
+   "models": [],
+   "blocked": false
+ }
+}
+```
+
+
+## Tracking Spend for Custom Tags
+
+Requirements: 
+
+- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
+
+#### Usage - /chat/completions requests with request tags 
+
+
+<Tabs>
+
+
+<TabItem value="openai" label="OpenAI Python v1.0.0+">
+
+Set `extra_body={"metadata": { }}` to `metadata` you want to pass
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages = [
+        {
+            "role": "user",
+            "content": "this is a test request, write a short poem"
+        }
+    ],
+    extra_body={
+        "metadata": {
+            "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
+        }
+    }
+)
+
+print(response)
+```
+</TabItem>
+
+<TabItem value="Curl" label="Curl Request">
+
+Pass `metadata` as part of the request body
+
+```shell
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+    --header 'Content-Type: application/json' \
+    --data '{
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what llm are you"
+        }
+    ],
+    "metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
+}'
+```
+</TabItem>
+<TabItem value="langchain" label="Langchain">
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain.schema import HumanMessage, SystemMessage
+
+chat = ChatOpenAI(
+    openai_api_base="http://0.0.0.0:4000",
+    model = "gpt-3.5-turbo",
+    temperature=0.1,
+    extra_body={
+        "metadata": {
+            "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
+        }
+    }
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that im using to make a test request to."
+    ),
+    HumanMessage(
+        content="test from litellm. tell me why it's amazing in 1 sentence"
+    ),
+]
+response = chat(messages)
+
+print(response)
+```
+
+</TabItem>
+</Tabs>
+
+
+#### Viewing Spend per tag
+
+#### `/spend/tags` Request Format 
+```shell
+curl -X GET "http://0.0.0.0:4000/spend/tags" \
+-H "Authorization: Bearer sk-1234"
+```
+
+#### `/spend/tags`Response Format
+```shell
+[
+  {
+    "individual_request_tag": "model-anthropic-claude-v2.1",
+    "log_count": 6,
+    "total_spend": 0.000672
+  },
+  {
+    "individual_request_tag": "app-ishaan-local",
+    "log_count": 4,
+    "total_spend": 0.000448
+  },
+  {
+    "individual_request_tag": "app-ishaan-prod",
+    "log_count": 2,
+    "total_spend": 0.000224
+  }
+]
+
+```
+
+
+


 ## Content Moderation
-### Content Moderation with LLM Guard
+#### Content Moderation with LLM Guard

 Set the LLM Guard API Base in your environment 

@ -160,7 +343,7 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
 </TabItem>
 </Tabs>

-### Content Moderation with LlamaGuard 
+#### Content Moderation with LlamaGuard 

 Currently works with Sagemaker's LlamaGuard endpoint. 

@ -194,7 +377,7 @@ callbacks: ["llamaguard_moderations"]



-### Content Moderation with Google Text Moderation 
+#### Content Moderation with Google Text Moderation 

 Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).

@ -250,7 +433,7 @@ Here are the category specific values:



-### Content Moderation with OpenAI Moderations
+#### Content Moderation with OpenAI Moderations

 Use this if you want to reject /chat, /completions, /embeddings calls that fail OpenAI Moderations checks

@ -276,7 +459,7 @@ Step 1 Set a `LAKERA_API_KEY` in your env
 LAKERA_API_KEY="7a91a1a6059da*******"
 ```

-Step 2. Add `lakera_prompt_injection` to your calbacks
+Step 2. Add `lakera_prompt_injection` to your callbacks

 ```yaml 
 litellm_settings:
@ -302,6 +485,42 @@ curl --location 'http://localhost:4000/chat/completions' \
 }'
 ```

+## Swagger Docs - Custom Routes + Branding 
+
+:::info 
+
+Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
+
+:::
+
+Set LiteLLM Key in your environment
+
+```bash
+LITELLM_LICENSE=""
+```
+
+#### Customize Title + Description
+
+In your environment, set: 
+
+```bash
+DOCS_TITLE="TotalGPT"
+DOCS_DESCRIPTION="Sample Company Description"
+```
+
+#### Customize Routes
+
+Hide admin routes from users. 
+
+In your environment, set: 
+
+```bash
+DOCS_FILTERED="True" # only shows openai routes to user
+```
+
+<Image img={require('../../img/custom_swagger.png')}  style={{ width: '900px', height: 'auto' }} />
+
+
 ## Enable Blocked User Lists 
 If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features 

@ -417,176 +636,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
    }
 '
 ```
-## Tracking Spend for Custom Tags
-
-Requirements: 
-
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
-
-### Usage - /chat/completions requests with request tags 
-
-
-<Tabs>
-
-
-<TabItem value="openai" label="OpenAI Python v1.0.0+">
-
-Set `extra_body={"metadata": { }}` to `metadata` you want to pass
-
-```python
-import openai
-client = openai.OpenAI(
-    api_key="anything",
-    base_url="http://0.0.0.0:4000"
-)
-
-# request sent to model set on litellm proxy, `litellm --model`
-response = client.chat.completions.create(
-    model="gpt-3.5-turbo",
-    messages = [
-        {
-            "role": "user",
-            "content": "this is a test request, write a short poem"
-        }
-    ],
-    extra_body={
-        "metadata": {
-            "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
-        }
-    }
-)
-
-print(response)
-```
-</TabItem>
-
-<TabItem value="Curl" label="Curl Request">
-
-Pass `metadata` as part of the request body
-
-```shell
-curl --location 'http://0.0.0.0:4000/chat/completions' \
-    --header 'Content-Type: application/json' \
-    --data '{
-    "model": "gpt-3.5-turbo",
-    "messages": [
-        {
-        "role": "user",
-        "content": "what llm are you"
-        }
-    ],
-    "metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
-}'
-```
-</TabItem>
-<TabItem value="langchain" label="Langchain">
-
-```python
-from langchain.chat_models import ChatOpenAI
-from langchain.prompts.chat import (
-    ChatPromptTemplate,
-    HumanMessagePromptTemplate,
-    SystemMessagePromptTemplate,
-)
-from langchain.schema import HumanMessage, SystemMessage
-
-chat = ChatOpenAI(
-    openai_api_base="http://0.0.0.0:4000",
-    model = "gpt-3.5-turbo",
-    temperature=0.1,
-    extra_body={
-        "metadata": {
-            "tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
-        }
-    }
-)
-
-messages = [
-    SystemMessage(
-        content="You are a helpful assistant that im using to make a test request to."
-    ),
-    HumanMessage(
-        content="test from litellm. tell me why it's amazing in 1 sentence"
-    ),
-]
-response = chat(messages)
-
-print(response)
-```
-
-</TabItem>
-</Tabs>
-
-
-### Viewing Spend per tag
-
-#### `/spend/tags` Request Format 
-```shell
-curl -X GET "http://0.0.0.0:4000/spend/tags" \
-H "Authorization: Bearer sk-1234"
-```
-
-#### `/spend/tags`Response Format
-```shell
-[
-  {
-    "individual_request_tag": "model-anthropic-claude-v2.1",
-    "log_count": 6,
-    "total_spend": 0.000672
-  },
-  {
-    "individual_request_tag": "app-ishaan-local",
-    "log_count": 4,
-    "total_spend": 0.000448
-  },
-  {
-    "individual_request_tag": "app-ishaan-prod",
-    "log_count": 2,
-    "total_spend": 0.000224
-  }
-]
-
-```
-
-
-<!-- ## Tracking Spend per Key
-
-## Tracking Spend per User -->
-
-## Swagger Docs - Custom Routes + Branding 
-
-:::info 
-
-Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
-
-:::
-
-Set LiteLLM Key in your environment
-
-```bash
-LITELLM_LICENSE=""
-```
-
-### Customize Title + Description
-
-In your environment, set: 
-
-```bash
-DOCS_TITLE="TotalGPT"
-DOCS_DESCRIPTION="Sample Company Description"
-```
-
-### Customize Routes
-
-Hide admin routes from users. 
-
-In your environment, set: 
-
-```bash
-DOCS_FILTERED="True" # only shows openai routes to user
-```
-
-<Image img={require('../../img/custom_swagger.png')}  style={{ width: '900px', height: 'auto' }} />

 ## Public Model Hub 

--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -41,7 +41,9 @@ litellm_settings:
 **Step 3**: Set required env variables for logging to langfuse
 ```shell
 export LANGFUSE_PUBLIC_KEY="pk_kk"
-export LANGFUSE_SECRET_KEY="sk_ss
+export LANGFUSE_SECRET_KEY="sk_ss"
+# Optional, defaults to https://cloud.langfuse.com
+export LANGFUSE_HOST="https://xxx.langfuse.com"
 ```

 **Step 4**: Start the proxy, make a test request
--- a/docs/my-website/docs/scheduler.md
+++ b/docs/my-website/docs/scheduler.md
@ -100,4 +100,76 @@ print(response)
 ```

 </TabItem>
-</Tabs>
+</Tabs>
+
+## Advanced - Redis Caching 
+
+Use redis caching to do request prioritization across multiple instances of LiteLLM. 
+
+### SDK 
+```python
+from litellm import Router
+
+router = Router(
+    model_list=[
+        {
+            "model_name": "gpt-3.5-turbo",
+            "litellm_params": {
+                "model": "gpt-3.5-turbo",
+                "mock_response": "Hello world this is Macintosh!", # fakes the LLM API call
+                "rpm": 1,
+            },
+        },
+    ],
+    ### REDIS PARAMS ###
+    redis_host=os.environ["REDIS_HOST"], 
+    redis_password=os.environ["REDIS_PASSWORD"], 
+    redis_port=os.environ["REDIS_PORT"], 
+)
+
+try:
+    _response = await router.schedule_acompletion( # 👈 ADDS TO QUEUE + POLLS + MAKES CALL
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Hey!"}],
+        priority=0, # 👈 LOWER IS BETTER
+    )
+except Exception as e:
+    print("didn't make request")
+```
+
+### PROXY 
+
+```yaml
+model_list:
+    - model_name: gpt-3.5-turbo-fake-model
+      litellm_params:
+        model: gpt-3.5-turbo
+        mock_response: "hello world!" 
+        api_key: my-good-key
+
+router_settings:
+    redis_host; os.environ/REDIS_HOST
+    redis_password: os.environ/REDIS_PASSWORD
+    redis_port: os.environ/REDIS_PORT
+```
+
+```bash
+$ litellm --config /path/to/config.yaml 
+
+# RUNNING on http://0.0.0.0:4000s
+```
+
+```bash
+curl -X POST 'http://localhost:4000/queue/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-D '{
+    "model": "gpt-3.5-turbo-fake-model",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what is the meaning of the universe? 1234"
+        }],
+    "priority": 0 👈 SET VALUE HERE
+}'
+```
--- a/docs/my-website/docs/secret.md
+++ b/docs/my-website/docs/secret.md
@ -1,11 +1,31 @@
 # Secret Manager
 LiteLLM supports reading secrets from Azure Key Vault and Infisical

+- AWS Key Managemenet Service
+- AWS Secret Manager
 - [Azure Key Vault](#azure-key-vault)
 - Google Key Management Service
 - [Infisical Secret Manager](#infisical-secret-manager)
 - [.env Files](#env-files)

+## AWS Key Management Service
+
+Use AWS KMS to storing a hashed copy of your Proxy Master Key in the environment. 
+
+```bash
+export LITELLM_MASTER_KEY="djZ9xjVaZ..." # 👈 ENCRYPTED KEY
+export AWS_REGION_NAME="us-west-2"
+```
+
+```yaml
+general_settings:
+  key_management_system: "aws_kms"
+  key_management_settings:
+    hosted_keys: ["LITELLM_MASTER_KEY"] # 👈 WHICH KEYS ARE STORED ON KMS
+```
+
+[**See Decryption Code**](https://github.com/BerriAI/litellm/blob/a2da2a8f168d45648b61279d4795d647d94f90c9/litellm/utils.py#L10182)
+
 ## AWS Secret Manager

 Store your proxy keys in AWS Secret Manager.
--- a/docs/my-website/docs/tutorials/finetuned_chat_gpt.md
+++ b/docs/my-website/docs/tutorials/finetuned_chat_gpt.md
@ -1,8 +1,8 @@
 # Using Fine-Tuned gpt-3.5-turbo
 LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models
-If you're trying to create your custom finetuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
+If you're trying to create your custom fine-tuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset

-Once you've created your fine tuned model, you can call it with `litellm.completion()` 
+Once you've created your fine-tuned model, you can call it with `litellm.completion()` 

 ## Usage
 ```python
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
@ -5975,9 +5975,9 @@
      }
    },
    "node_modules/caniuse-lite": {
-      "version": "1.0.30001519",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001519.tgz",
-      "integrity": "sha512-0QHgqR+Jv4bxHMp8kZ1Kn8CH55OikjKJ6JmKkZYP1F3D7w+lnFXF70nG5eNfsZS89jadi5Ywy5UCSKLAglIRkg==",
+      "version": "1.0.30001629",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001629.tgz",
+      "integrity": "sha512-c3dl911slnQhmxUIT4HhYzT7wnBK/XYpGnYLOj4nJBaRiw52Ibe7YxlDaAeRECvA786zCuExhxIUJ2K7nHMrBw==",
      "funding": [
        {
          "type": "opencollective",
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -36,6 +36,7 @@ const sidebars = {
          label: "📖 All Endpoints (Swagger)",
          href: "https://litellm-api.up.railway.app/",
        },
+        "proxy/enterprise",
        "proxy/demo",
        "proxy/configs",
        "proxy/reliability",
@ -45,7 +46,6 @@ const sidebars = {
        "proxy/customers",
        "proxy/billing",
        "proxy/user_keys",
-        "proxy/enterprise",
        "proxy/virtual_keys",
        "proxy/alerting",
        {
--- a/docs/my-website/yarn.lock
+++ b/docs/my-website/yarn.lock
--- a/enterprise/enterprise_callbacks/example_logging_api.py
+++ b/enterprise/enterprise_callbacks/example_logging_api.py
@ -18,10 +18,6 @@ async def log_event(request: Request):

        return {"message": "Request received successfully"}
    except Exception as e:
-        print(f"Error processing request: {str(e)}")
-        import traceback
-
-        traceback.print_exc()
        raise HTTPException(status_code=500, detail="Internal Server Error")


--- a/enterprise/enterprise_callbacks/generic_api_callback.py
+++ b/enterprise/enterprise_callbacks/generic_api_callback.py
@ -120,6 +120,5 @@ class GenericAPILogger:
            )
            return response
        except Exception as e:
-            traceback.print_exc()
-            verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
+            verbose_logger.error(f"Generic - {str(e)}\n{traceback.format_exc()}")
            pass
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -82,7 +82,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(traceback.format_exc())

    async def async_post_call_success_hook(
        self,
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@ -118,4 +118,4 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(traceback.format_exc())
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -92,7 +92,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                        },
                    )
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(traceback.format_exc())
            raise e

    def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool:
--- a/litellm/init.py
+++ b/litellm/init.py
@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
 pre_call_rules: List[Callable] = []
 post_call_rules: List[Callable] = []
 turn_off_message_logging: Optional[bool] = False
+store_audit_logs = False  # Enterprise feature, allow users to see audit logs
 ## end of callbacks #############

 email: Optional[str] = (
@ -808,6 +809,7 @@ from .exceptions import (
    APIConnectionError,
    APIResponseValidationError,
    UnprocessableEntityError,
+    InternalServerError,
    LITELLM_EXCEPTION_TYPES,
 )
 from .budget_manager import BudgetManager
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -1,5 +1,6 @@
 import logging, os, json
 from logging import Formatter
+import traceback

 set_verbose = False
 json_logs = bool(os.getenv("JSON_LOGS", False))
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -253,7 +253,6 @@ class RedisCache(BaseCache):
                str(e),
                value,
            )
-            traceback.print_exc()
            raise e

    async def async_scan_iter(self, pattern: str, count: int = 100) -> list:
@ -313,7 +312,6 @@ class RedisCache(BaseCache):
                str(e),
                value,
            )
-            traceback.print_exc()

        key = self.check_and_fix_namespace(key=key)
        async with _redis_client as redis_client:
@ -352,7 +350,6 @@ class RedisCache(BaseCache):
                    str(e),
                    value,
                )
-                traceback.print_exc()

    async def async_set_cache_pipeline(self, cache_list, ttl=None):
        """
@ -413,7 +410,6 @@ class RedisCache(BaseCache):
                str(e),
                cache_value,
            )
-            traceback.print_exc()

    async def batch_cache_write(self, key, value, **kwargs):
        print_verbose(
@ -458,7 +454,6 @@ class RedisCache(BaseCache):
                str(e),
                value,
            )
-            traceback.print_exc()
            raise e

    async def flush_cache_buffer(self):
@ -495,8 +490,9 @@ class RedisCache(BaseCache):
            return self._get_cache_logic(cached_response=cached_response)
        except Exception as e:
            # NON blocking - notify users Redis is throwing an exception
-            traceback.print_exc()
-            logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+            verbose_logger.error(
+                "LiteLLM Caching: get() - Got exception from REDIS: ", e
+            )

    def batch_get_cache(self, key_list) -> dict:
        """
@ -646,10 +642,9 @@ class RedisCache(BaseCache):
                error=e,
                call_type="sync_ping",
            )
-            print_verbose(
+            verbose_logger.error(
                f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
            )
-            traceback.print_exc()
            raise e

    async def ping(self) -> bool:
@ -683,10 +678,9 @@ class RedisCache(BaseCache):
                        call_type="async_ping",
                    )
                )
-                print_verbose(
+                verbose_logger.error(
                    f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
                )
-                traceback.print_exc()
                raise e

    async def delete_cache_keys(self, keys):
@ -1138,22 +1132,23 @@ class S3Cache(BaseCache):
                    cached_response = ast.literal_eval(cached_response)
            if type(cached_response) is not dict:
                cached_response = dict(cached_response)
-            print_verbose(
+            verbose_logger.debug(
                f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
            )

            return cached_response
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchKey":
-                print_verbose(
+                verbose_logger.error(
                    f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
                )
                return None

        except Exception as e:
            # NON blocking - notify users S3 is throwing an exception
-            traceback.print_exc()
-            print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
+            verbose_logger.error(
+                f"S3 Caching: get_cache() - Got exception from S3: {e}"
+            )

    async def async_get_cache(self, key, **kwargs):
        return self.get_cache(key=key, **kwargs)
@ -1234,8 +1229,7 @@ class DualCache(BaseCache):

            return result
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
            raise e

    def get_cache(self, key, local_only: bool = False, **kwargs):
@ -1262,7 +1256,7 @@ class DualCache(BaseCache):
            print_verbose(f"get cache: cache result: {result}")
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())

    def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs):
        try:
@ -1295,7 +1289,7 @@ class DualCache(BaseCache):
            print_verbose(f"async batch get cache: cache result: {result}")
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())

    async def async_get_cache(self, key, local_only: bool = False, **kwargs):
        # Try to fetch from in-memory cache first
@ -1328,7 +1322,7 @@ class DualCache(BaseCache):
            print_verbose(f"get cache: cache result: {result}")
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())

    async def async_batch_get_cache(
        self, keys: list, local_only: bool = False, **kwargs
@ -1368,7 +1362,7 @@ class DualCache(BaseCache):

            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())

    async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
        print_verbose(
@ -1381,8 +1375,8 @@ class DualCache(BaseCache):
            if self.redis_cache is not None and local_only == False:
                await self.redis_cache.async_set_cache(key, value, **kwargs)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.debug(traceback.format_exc())

    async def async_batch_set_cache(
        self, cache_list: list, local_only: bool = False, **kwargs
@ -1404,8 +1398,8 @@ class DualCache(BaseCache):
                    cache_list=cache_list, ttl=kwargs.get("ttl", None)
                )
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.debug(traceback.format_exc())

    async def async_increment_cache(
        self, key, value: float, local_only: bool = False, **kwargs
@ -1429,8 +1423,8 @@ class DualCache(BaseCache):

            return result
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.debug(traceback.format_exc())
            raise e

    def flush_cache(self):
@ -1846,8 +1840,8 @@ class Cache:
            )
            self.cache.set_cache(cache_key, cached_data, **kwargs)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            verbose_logger.debug(traceback.format_exc())
            pass

    async def async_add_cache(self, result, *args, **kwargs):
@ -1864,8 +1858,8 @@ class Cache:
                )
                await self.cache.async_set_cache(cache_key, cached_data, **kwargs)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            verbose_logger.debug(traceback.format_exc())

    async def async_add_cache_pipeline(self, result, *args, **kwargs):
        """
@ -1897,8 +1891,8 @@ class Cache:
                    )
                await asyncio.gather(*tasks)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            verbose_logger.debug(traceback.format_exc())

    async def batch_cache_write(self, result, *args, **kwargs):
        cache_key, cached_data, kwargs = self._add_cache_logic(
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -638,6 +638,7 @@ LITELLM_EXCEPTION_TYPES = [
    APIConnectionError,
    APIResponseValidationError,
    OpenAIError,
+    InternalServerError,
 ]


--- a/litellm/integrations/aispend.py
+++ b/litellm/integrations/aispend.py
@ -169,6 +169,5 @@ class AISpendLogger:

            print_verbose(f"AISpend Logging - final data object: {data}")
        except:
-            # traceback.print_exc()
            print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/berrispend.py
+++ b/litellm/integrations/berrispend.py
@ -178,6 +178,5 @@ class BerriSpendLogger:
            print_verbose(f"BerriSpend Logging - final data object: {data}")
            response = requests.post(url, headers=headers, json=data)
        except:
-            # traceback.print_exc()
            print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -297,6 +297,5 @@ class ClickhouseLogger:
            # make request to endpoint with payload
            verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
        except Exception as e:
-            traceback.print_exc()
            verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
            pass
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -115,7 +115,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
            )
            print_verbose(f"Custom Logger - model call details: {kwargs}")
        except:
-            traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")

    async def async_log_input_event(
@ -130,7 +129,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
            )
            print_verbose(f"Custom Logger - model call details: {kwargs}")
        except:
-            traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")

    def log_event(
@ -146,7 +144,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
                end_time,
            )
        except:
-            # traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
            pass

@ -163,6 +160,5 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
                end_time,
            )
        except:
-            # traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/datadog.py
+++ b/litellm/integrations/datadog.py
@ -134,7 +134,6 @@ class DataDogLogger:
                f"Datadog Layer Logging - final response object: {response_obj}"
            )
        except Exception as e:
-            traceback.print_exc()
            verbose_logger.debug(
                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
--- a/litellm/integrations/dynamodb.py
+++ b/litellm/integrations/dynamodb.py
@ -85,6 +85,5 @@ class DyanmoDBLogger:
            )
            return response
        except:
-            traceback.print_exc()
            print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -112,6 +112,5 @@ class HeliconeLogger:
                )
                print_verbose(f"Helicone Logging - Error {response.text}")
        except:
-            # traceback.print_exc()
            print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -69,6 +69,43 @@ class LangFuseLogger:
        else:
            self.upstream_langfuse = None

+    @staticmethod
+    def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
+        """
+        Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
+        and overwrites litellm_params.metadata if already included.
+
+        For example if you want to append your trace to an existing `trace_id` via header, send
+        `headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
+        """
+        if litellm_params is None:
+            return metadata
+
+        if litellm_params.get("proxy_server_request") is None:
+            return metadata
+
+        if metadata is None:
+            metadata = {}
+
+        proxy_headers = (
+            litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
+        )
+
+        for metadata_param_key in proxy_headers:
+            if metadata_param_key.startswith("langfuse_"):
+                trace_param_key = metadata_param_key.replace("langfuse_", "", 1)
+                if trace_param_key in metadata:
+                    verbose_logger.warning(
+                        f"Overwriting Langfuse `{trace_param_key}` from request header"
+                    )
+                else:
+                    verbose_logger.debug(
+                        f"Found Langfuse `{trace_param_key}` in request header"
+                    )
+                metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
+
+        return metadata
+
    # def log_error(kwargs, response_obj, start_time, end_time):
    #     generation = trace.generation(
    #         level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
@ -97,6 +134,7 @@ class LangFuseLogger:
            metadata = (
                litellm_params.get("metadata", {}) or {}
            )  # if litellm_params['metadata'] == None
+            metadata = self.add_metadata_from_header(litellm_params, metadata)
            optional_params = copy.deepcopy(kwargs.get("optional_params", {}))

            prompt = {"messages": kwargs.get("messages")}
@ -182,9 +220,11 @@ class LangFuseLogger:
            verbose_logger.info(f"Langfuse Layer Logging - logging success")

            return {"trace_id": trace_id, "generation_id": generation_id}
-        except:
-            traceback.print_exc()
-            verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
+        except Exception as e:
+            verbose_logger.error(
+                "Langfuse Layer Error(): Exception occured - {}".format(str(e))
+            )
+            verbose_logger.debug(traceback.format_exc())
            return {"trace_id": None, "generation_id": None}

    async def _async_log_event(
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -44,7 +44,9 @@ class LangsmithLogger:
        print_verbose(
            f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
        )
-        langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com")
+        langsmith_base_url = os.getenv(
+            "LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
+        )

        try:
            print_verbose(
@ -89,9 +91,7 @@ class LangsmithLogger:
            }

            url = f"{langsmith_base_url}/runs"
-            print_verbose(
-                f"Langsmith Logging - About to send data to {url} ..."
-            )
+            print_verbose(f"Langsmith Logging - About to send data to {url} ...")
            response = requests.post(
                url=url,
                json=data,
@ -106,6 +106,5 @@ class LangsmithLogger:
                f"Langsmith Layer Logging - final response object: {response_obj}"
            )
        except:
-            # traceback.print_exc()
            print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/logfire_logger.py
+++ b/litellm/integrations/logfire_logger.py
@ -171,7 +171,6 @@ class LogfireLogger:
                f"Logfire Layer Logging - final response object: {response_obj}"
            )
        except Exception as e:
-            traceback.print_exc()
            verbose_logger.debug(
                f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
--- a/litellm/integrations/lunary.py
+++ b/litellm/integrations/lunary.py
@ -14,6 +14,7 @@ def parse_usage(usage):
        "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
    }

+
 def parse_tool_calls(tool_calls):
    if tool_calls is None:
        return None
@ -26,13 +27,13 @@ def parse_tool_calls(tool_calls):
            "function": {
                "name": tool_call.function.name,
                "arguments": tool_call.function.arguments,
-            }
+            },
        }

        return serialized
-    
+
    return [clean_tool_call(tool_call) for tool_call in tool_calls]
-    
+

 def parse_messages(input):

@ -176,6 +177,5 @@ class LunaryLogger:
            )

        except:
-            # traceback.print_exc()
            print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -109,8 +109,8 @@ class PrometheusLogger:
                    end_user_id, user_api_key, model, user_api_team, user_id
                ).inc()
        except Exception as e:
-            traceback.print_exc()
-            verbose_logger.debug(
-                f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
+            verbose_logger.error(
+                "prometheus Layer Error(): Exception occured - {}".format(str(e))
            )
+            verbose_logger.debug(traceback.format_exc())
            pass
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@ -180,6 +180,5 @@ class S3Logger:
            print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
            return response
        except Exception as e:
-            traceback.print_exc()
            verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
            pass
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -110,6 +110,5 @@ class Supabase:
                )

        except:
-            # traceback.print_exc()
            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@ -217,6 +217,5 @@ class WeightsBiasesLogger:
                f"W&B Logging Logging - final response object: {response_obj}"
            )
        except:
-            # traceback.print_exc()
            print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
            pass
--- a/litellm/llms/gemini.py
+++ b/litellm/llms/gemini.py
@ -1,13 +1,14 @@
-import os, types, traceback, copy, asyncio
-import json
-from enum import Enum
+import types
+import traceback
+import copy
 import time
 from typing import Callable, Optional
-from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
+from litellm.utils import ModelResponse, Choices, Message, Usage
 import litellm
-import sys, httpx
+import httpx
 from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt
 from packaging.version import Version
+from litellm import verbose_logger


 class GeminiError(Exception):
@ -264,7 +265,8 @@ def completion(
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
+        verbose_logger.debug(traceback.format_exc())
        raise GeminiError(
            message=traceback.format_exc(), status_code=response.status_code
        )
@ -356,7 +358,8 @@ async def async_completion(
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
+        verbose_logger.debug(traceback.format_exc())
        raise GeminiError(
            message=traceback.format_exc(), status_code=response.status_code
        )
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -2,10 +2,12 @@ from itertools import chain
 import requests, types, time  # type: ignore
 import json, uuid
 import traceback
-from typing import Optional
+from typing import Optional, List
 import litellm
+from litellm.types.utils import ProviderField
 import httpx, aiohttp, asyncio  # type: ignore
 from .prompt_templates.factory import prompt_factory, custom_prompt
+from litellm import verbose_logger


 class OllamaError(Exception):
@ -124,6 +126,19 @@ class OllamaConfig:
            )
            and v is not None
        }
+    
+    def get_required_params(self) -> List[ProviderField]:
+        """For a given provider, return it's required fields with a description"""
+        return [
+            ProviderField(
+                field_name="base_url",
+                field_type="string",
+                field_description="Your Ollama API Base",
+                field_value="http://10.10.11.249:11434",
+            )
+        ]
+
+
    def get_supported_openai_params(
        self,
    ):
@ -138,10 +153,12 @@ class OllamaConfig:
            "response_format",
        ]

+
 # ollama wants plain base64 jpeg/png files as images.  strip any leading dataURI
 # and convert to jpeg if necessary.
 def _convert_image(image):
    import base64, io
+
    try:
        from PIL import Image
    except:
@ -391,7 +408,13 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
                async for transformed_chunk in streamwrapper:
                    yield transformed_chunk
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
+            "LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_logger.debug(traceback.format_exc())
+
        raise e


@ -455,7 +478,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
            )
            return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
+            "LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_logger.debug(traceback.format_exc())
        raise e


--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -1,11 +1,15 @@
 from itertools import chain
-import requests, types, time
-import json, uuid
+import requests
+import types
+import time
+import json
+import uuid
 import traceback
 from typing import Optional
+from litellm import verbose_logger
 import litellm
-import httpx, aiohttp, asyncio
-from .prompt_templates.factory import prompt_factory, custom_prompt
+import httpx
+import aiohttp


 class OllamaError(Exception):
@ -299,7 +303,10 @@ def get_ollama_response(
            tool_calls=[
                {
                    "id": f"call_{str(uuid.uuid4())}",
-                    "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                    "function": {
+                        "name": function_call["name"],
+                        "arguments": json.dumps(function_call["arguments"]),
+                    },
                    "type": "function",
                }
            ],
@ -307,7 +314,9 @@ def get_ollama_response(
        model_response["choices"][0]["message"] = message
        model_response["choices"][0]["finish_reason"] = "tool_calls"
    else:
-        model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
+        model_response["choices"][0]["message"]["content"] = response_json["message"][
+            "content"
+        ]
    model_response["created"] = int(time.time())
    model_response["model"] = "ollama/" + model
    prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages))  # type: ignore
@ -361,7 +370,10 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
                    tool_calls=[
                        {
                            "id": f"call_{str(uuid.uuid4())}",
-                            "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                            "function": {
+                                "name": function_call["name"],
+                                "arguments": json.dumps(function_call["arguments"]),
+                            },
                            "type": "function",
                        }
                    ],
@ -410,9 +422,10 @@ async def ollama_async_streaming(
                first_chunk_content = first_chunk.choices[0].delta.content or ""
                response_content = first_chunk_content + "".join(
                    [
-                    chunk.choices[0].delta.content
-                    async for chunk in streamwrapper
-                    if chunk.choices[0].delta.content]
+                        chunk.choices[0].delta.content
+                        async for chunk in streamwrapper
+                        if chunk.choices[0].delta.content
+                    ]
                )
                function_call = json.loads(response_content)
                delta = litellm.utils.Delta(
@ -420,7 +433,10 @@ async def ollama_async_streaming(
                    tool_calls=[
                        {
                            "id": f"call_{str(uuid.uuid4())}",
-                            "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                            "function": {
+                                "name": function_call["name"],
+                                "arguments": json.dumps(function_call["arguments"]),
+                            },
                            "type": "function",
                        }
                    ],
@ -433,7 +449,8 @@ async def ollama_async_streaming(
                async for transformed_chunk in streamwrapper:
                    yield transformed_chunk
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error("LiteLLM.gemini(): Exception occured - {}".format(str(e)))
+        verbose_logger.debug(traceback.format_exc())


 async def ollama_acompletion(
@ -483,7 +500,10 @@ async def ollama_acompletion(
                    tool_calls=[
                        {
                            "id": f"call_{str(uuid.uuid4())}",
-                            "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                            "function": {
+                                "name": function_call["name"],
+                                "arguments": json.dumps(function_call["arguments"]),
+                            },
                            "type": "function",
                        }
                    ],
@ -491,7 +511,9 @@ async def ollama_acompletion(
                model_response["choices"][0]["message"] = message
                model_response["choices"][0]["finish_reason"] = "tool_calls"
            else:
-                model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
+                model_response["choices"][0]["message"]["content"] = response_json[
+                    "message"
+                ]["content"]

            model_response["created"] = int(time.time())
            model_response["model"] = "ollama_chat/" + data["model"]
@ -509,5 +531,9 @@ async def ollama_acompletion(
            )
            return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
+            "LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
+        )
+        verbose_logger.debug(traceback.format_exc())
+
        raise e
--- a/litellm/llms/palm.py
+++ b/litellm/llms/palm.py
@ -1,11 +1,12 @@
-import os, types, traceback, copy
-import json
-from enum import Enum
+import types
+import traceback
+import copy
 import time
 from typing import Callable, Optional
-from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
+from litellm.utils import ModelResponse, Choices, Message, Usage
 import litellm
-import sys, httpx
+import httpx
+from litellm import verbose_logger


 class PalmError(Exception):
@ -165,7 +166,10 @@ def completion(
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
+            "litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
+        )
+        verbose_logger.debug(traceback.format_exc())
        raise PalmError(
            message=traceback.format_exc(), status_code=response.status_code
        )
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -826,7 +826,7 @@ def anthropic_messages_pt_xml(messages: list):
            )  # either string or none
            if messages[msg_i].get(
                "tool_calls", []
-            ):  # support assistant tool invoke convertion
+            ):  # support assistant tool invoke conversion
                assistant_text += convert_to_anthropic_tool_invoke_xml(  # type: ignore
                    messages[msg_i]["tool_calls"]
                )
@ -1217,7 +1217,7 @@ def anthropic_messages_pt(messages: list):

            if messages[msg_i].get(
                "tool_calls", []
-            ):  # support assistant tool invoke convertion
+            ):  # support assistant tool invoke conversion
                assistant_content.extend(
                    convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
                )
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -297,24 +297,29 @@ def _convert_gemini_role(role: str) -> Literal["user", "model"]:

 def _process_gemini_image(image_url: str) -> PartType:
    try:
-        if "gs://" in image_url:
-            # Case 1: Images with Cloud Storage URIs
+        if ".mp4" in image_url and "gs://" in image_url:
+            # Case 1: Videos with Cloud Storage URIs
+            part_mime = "video/mp4"
+            _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
+            return PartType(file_data=_file_data)
+        elif ".pdf" in image_url and "gs://" in image_url:
+            # Case 2: PDF's with Cloud Storage URIs
+            part_mime = "application/pdf"
+            _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
+            return PartType(file_data=_file_data)
+        elif "gs://" in image_url:
+            # Case 3: Images with Cloud Storage URIs
            # The supported MIME types for images include image/png and image/jpeg.
            part_mime = "image/png" if "png" in image_url else "image/jpeg"
            _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
            return PartType(file_data=_file_data)
        elif "https:/" in image_url:
-            # Case 2: Images with direct links
+            # Case 4: Images with direct links
            image = _load_image_from_url(image_url)
            _blob = BlobType(data=image.data, mime_type=image._mime_type)
            return PartType(inline_data=_blob)
-        elif ".mp4" in image_url and "gs://" in image_url:
-            # Case 3: Videos with Cloud Storage URIs
-            part_mime = "video/mp4"
-            _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
-            return PartType(file_data=_file_data)
        elif "base64" in image_url:
-            # Case 4: Images with base64 encoding
+            # Case 5: Images with base64 encoding
            import base64, re

            # base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
@ -390,7 +395,7 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
                assistant_content.extend(_parts)
            elif messages[msg_i].get(
                "tool_calls", []
-            ):  # support assistant tool invoke convertion
+            ):  # support assistant tool invoke conversion
                assistant_content.extend(
                    convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
                )
@ -642,9 +647,9 @@ def completion(

        prompt = " ".join(
            [
-                message["content"]
+                message.get("content")
                for message in messages
-                if isinstance(message["content"], str)
+                if isinstance(message.get("content", None), str)
            ]
        )

--- a/litellm/main.py
+++ b/litellm/main.py
@ -365,7 +365,10 @@ async def acompletion(
            )  # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
        return response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
+            "litellm.acompletion(): Exception occured - {}".format(str(e))
+        )
+        verbose_logger.debug(traceback.format_exc())
        custom_llm_provider = custom_llm_provider or "openai"
        raise exception_type(
            model=model,
@ -478,7 +481,10 @@ def mock_completion(
    except Exception as e:
        if isinstance(e, openai.APIError):
            raise e
-        traceback.print_exc()
+        verbose_logger.error(
+            "litellm.mock_completion(): Exception occured - {}".format(str(e))
+        )
+        verbose_logger.debug(traceback.format_exc())
        raise Exception("Mock completion response failed")


@ -4449,7 +4455,10 @@ async def ahealth_check(
                response = {}  # args like remaining ratelimit etc.
        return response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
+            "litellm.ahealth_check(): Exception occured - {}".format(str(e))
+        )
+        verbose_logger.debug(traceback.format_exc())
        stack_trace = traceback.format_exc()
        if isinstance(stack_trace, str):
            stack_trace = stack_trace[:1000]
--- a/litellm/proxy/_logging.py
+++ b/litellm/proxy/_logging.py
@ -1,6 +1,7 @@
 import json
 import logging
 from logging import Formatter
+import sys


 class JsonFormatter(Formatter):
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -56,8 +56,10 @@ router_settings:

 litellm_settings:
  success_callback: ["langfuse"]
-  json_logs: true

 general_settings:
  alerting: ["email"]
+  key_management_system: "aws_kms"
+  key_management_settings:
+    hosted_keys: ["LITELLM_MASTER_KEY"]

--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -76,6 +76,17 @@ class LitellmUserRoles(str, enum.Enum):
        return ui_labels.get(self.value, "")


+class LitellmTableNames(str, enum.Enum):
+    """
+    Enum for Table Names used by LiteLLM
+    """
+
+    TEAM_TABLE_NAME: str = "LiteLLM_TeamTable"
+    USER_TABLE_NAME: str = "LiteLLM_UserTable"
+    KEY_TABLE_NAME: str = "LiteLLM_VerificationToken"
+    PROXY_MODEL_TABLE_NAME: str = "LiteLLM_ModelTable"
+
+
 AlertType = Literal[
    "llm_exceptions",
    "llm_too_slow",
@ -935,6 +946,7 @@ class KeyManagementSystem(enum.Enum):
    AZURE_KEY_VAULT = "azure_key_vault"
    AWS_SECRET_MANAGER = "aws_secret_manager"
    LOCAL = "local"
+    AWS_KMS = "aws_kms"


 class KeyManagementSettings(LiteLLMBase):
@ -1276,6 +1288,22 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
    endTime: Union[str, datetime, None]


+class LiteLLM_AuditLogs(LiteLLMBase):
+    id: str
+    updated_at: datetime
+    changed_by: str
+    action: Literal["created", "updated", "deleted"]
+    table_name: Literal[
+        LitellmTableNames.TEAM_TABLE_NAME,
+        LitellmTableNames.USER_TABLE_NAME,
+        LitellmTableNames.KEY_TABLE_NAME,
+        LitellmTableNames.PROXY_MODEL_TABLE_NAME,
+    ]
+    object_id: str
+    before_value: Optional[Json] = None
+    updated_values: Optional[Json] = None
+
+
 class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
    response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None

--- a/litellm/proxy/hooks/azure_content_safety.py
+++ b/litellm/proxy/hooks/azure_content_safety.py
@ -88,7 +88,7 @@ class _PROXY_AzureContentSafety(
            verbose_proxy_logger.debug(
                "Error in Azure Content-Safety: %s", traceback.format_exc()
            )
-            traceback.print_exc()
+            verbose_proxy_logger.debug(traceback.format_exc())
            raise

        result = self._compute_result(response)
@ -123,7 +123,12 @@ class _PROXY_AzureContentSafety(
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
+                "litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_proxy_logger.debug(traceback.format_exc())

    async def async_post_call_success_hook(
        self,
--- a/litellm/proxy/hooks/batch_redis_get.py
+++ b/litellm/proxy/hooks/batch_redis_get.py
@ -94,7 +94,12 @@ class _PROXY_BatchRedisRequests(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
+                "litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_proxy_logger.debug(traceback.format_exc())

    async def async_get_cache(self, *args, **kwargs):
        """
--- a/litellm/proxy/hooks/cache_control_check.py
+++ b/litellm/proxy/hooks/cache_control_check.py
@ -1,13 +1,13 @@
 # What this does?
 ## Checks if key is allowed to use the cache controls passed in to the completion() call

-from typing import Optional
 import litellm
+from litellm import verbose_logger
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
-import json, traceback
+import traceback


 class _PROXY_CacheControlCheck(CustomLogger):
@ -54,4 +54,9 @@ class _PROXY_CacheControlCheck(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
--- a/litellm/proxy/hooks/max_budget_limiter.py
+++ b/litellm/proxy/hooks/max_budget_limiter.py
@ -1,10 +1,10 @@
-from typing import Optional
+from litellm import verbose_logger
 import litellm
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
-import json, traceback
+import traceback


 class _PROXY_MaxBudgetLimiter(CustomLogger):
@ -44,4 +44,9 @@ class _PROXY_MaxBudgetLimiter(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
--- a/litellm/proxy/hooks/presidio_pii_masking.py
+++ b/litellm/proxy/hooks/presidio_pii_masking.py
@ -8,8 +8,8 @@
 #  Tell us how we can improve! - Krrish & Ishaan


-from typing import Optional, Literal, Union
-import litellm, traceback, sys, uuid, json
+from typing import Optional, Union
+import litellm, traceback, uuid, json  # noqa: E401
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
@ -21,8 +21,8 @@ from litellm.utils import (
    ImageResponse,
    StreamingChoices,
 )
-from datetime import datetime
-import aiohttp, asyncio
+import aiohttp
+import asyncio


 class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -138,7 +138,12 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
                else:
                    raise Exception(f"Invalid anonymizer response: {redacted_text}")
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
+                "litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_proxy_logger.debug(traceback.format_exc())
            raise e

    async def async_pre_call_hook(
--- a/litellm/proxy/hooks/prompt_injection_detection.py
+++ b/litellm/proxy/hooks/prompt_injection_detection.py
@ -204,7 +204,12 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
                return e.detail["error"]
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_proxy_logger.debug(traceback.format_exc())

    async def async_moderation_hook(
        self,
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -23,4 +23,5 @@ general_settings:
  master_key: sk-1234

 litellm_settings:
-  callbacks: ["otel"]
+  callbacks: ["otel"]
+  store_audit_logs: true
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -103,6 +103,7 @@ from litellm.proxy.utils import (
    update_spend,
    encrypt_value,
    decrypt_value,
+    get_error_message_str,
 )
 from litellm import (
    CreateBatchRequest,
@ -112,7 +113,10 @@ from litellm import (
    CreateFileRequest,
 )
 from litellm.proxy.secret_managers.google_kms import load_google_kms
-from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
+from litellm.proxy.secret_managers.aws_secret_manager import (
+    load_aws_secret_manager,
+    load_aws_kms,
+)
 import pydantic
 from litellm.proxy._types import *
 from litellm.caching import DualCache, RedisCache
@ -125,7 +129,10 @@ from litellm.router import (
    AssistantsTypedDict,
 )
 from litellm.router import ModelInfo as RouterModelInfo
-from litellm._logging import verbose_router_logger, verbose_proxy_logger
+from litellm._logging import (
+    verbose_router_logger,
+    verbose_proxy_logger,
+)
 from litellm.proxy.auth.handle_jwt import JWTHandler
 from litellm.proxy.auth.litellm_license import LicenseCheck
 from litellm.proxy.auth.model_checks import (
@ -1471,7 +1478,12 @@ async def user_api_key_auth(
        else:
            raise Exception()
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, litellm.BudgetExceededError):
            raise ProxyException(
                message=e.message, type="auth_error", param=None, code=400
@ -2736,10 +2748,12 @@ class ProxyConfig:
                    load_google_kms(use_google_kms=True)
                elif (
                    key_management_system
-                    == KeyManagementSystem.AWS_SECRET_MANAGER.value
+                    == KeyManagementSystem.AWS_SECRET_MANAGER.value  # noqa: F405
                ):
                    ### LOAD FROM AWS SECRET MANAGER ###
                    load_aws_secret_manager(use_aws_secret_manager=True)
+                elif key_management_system == KeyManagementSystem.AWS_KMS.value:
+                    load_aws_kms(use_aws_kms=True)
                else:
                    raise ValueError("Invalid Key Management System selected")
            key_management_settings = general_settings.get(
@ -2773,6 +2787,7 @@ class ProxyConfig:
            master_key = general_settings.get(
                "master_key", litellm.get_secret("LITELLM_MASTER_KEY", None)
            )
+
            if master_key and master_key.startswith("os.environ/"):
                master_key = litellm.get_secret(master_key)
                if not isinstance(master_key, str):
@ -3476,7 +3491,12 @@ async def generate_key_helper_fn(
            )
            key_data["token_id"] = getattr(create_key_response, "token", None)
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise e
        raise HTTPException(
@ -3515,7 +3535,12 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None)
        else:
            raise Exception("DB not connected. prisma_client is None")
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        raise e
    return deleted_tokens

@ -3676,7 +3701,12 @@ async def async_assistants_data_generator(
        done_message = "[DONE]"
        yield f"data: {done_message}\n\n"
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict,
            original_exception=e,
@ -3686,9 +3716,6 @@ async def async_assistants_data_generator(
            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
-        if user_debug:
-            traceback.print_exc()
-
        if isinstance(e, HTTPException):
            raise e
        else:
@ -3728,7 +3755,12 @@ async def async_data_generator(
        done_message = "[DONE]"
        yield f"data: {done_message}\n\n"
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict,
            original_exception=e,
@ -3738,8 +3770,6 @@ async def async_data_generator(
            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
-        if user_debug:
-            traceback.print_exc()

        if isinstance(e, HTTPException):
            raise e
@ -3800,6 +3830,18 @@ def on_backoff(details):
    verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"])


+def giveup(e):
+    result = not (
+        isinstance(e, ProxyException)
+        and getattr(e, "message", None) is not None
+        and isinstance(e.message, str)
+        and "Max parallel request limit reached" in e.message
+    )
+    if result:
+        verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
+    return result
+
+
@router.on_event("startup")
 async def startup_event():
    global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db
@ -4084,12 +4126,8 @@ def model_list(
    max_tries=litellm.num_retries or 3,  # maximum number of retries
    max_time=litellm.request_timeout or 60,  # maximum total time to retry for
    on_backoff=on_backoff,  # specifying the function to call on backoff
-    giveup=lambda e: not (
-        isinstance(e, ProxyException)
-        and getattr(e, "message", None) is not None
-        and isinstance(e.message, str)
-        and "Max parallel request limit reached" in e.message
-    ),  # the result of the logical expression is on the second position
+    giveup=giveup,
+    logger=verbose_proxy_logger,
 )
 async def chat_completion(
    request: Request,
@ -4098,6 +4136,7 @@ async def chat_completion(
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    global general_settings, user_debug, proxy_logging_obj, llm_model_list
+
    data = {}
    try:
        body = await request.body()
@ -4386,7 +4425,12 @@ async def chat_completion(
        return _chat_response
    except Exception as e:
        data["litellm_status"] = "fail"  # used for alerting
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
+                get_error_message_str(e=e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
@ -4397,8 +4441,6 @@ async def chat_completion(
            litellm_debug_info,
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
-        if user_debug:
-            traceback.print_exc()

        if isinstance(e, HTTPException):
            raise ProxyException(
@ -4630,15 +4672,12 @@ async def completion(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
-        litellm_debug_info = getattr(e, "litellm_debug_info", "")
-        verbose_proxy_logger.debug(
-            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
-            e,
-            litellm_debug_info,
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
+                str(e)
+            )
        )
-        traceback.print_exc()
-        error_traceback = traceback.format_exc()
+        verbose_proxy_logger.debug(traceback.format_exc())
        error_msg = f"{str(e)}"
        raise ProxyException(
            message=getattr(e, "message", error_msg),
@ -4848,7 +4887,12 @@ async def embeddings(
            e,
            litellm_debug_info,
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e)),
@ -5027,7 +5071,12 @@ async def image_generation(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e)),
@ -5205,7 +5254,12 @@ async def audio_speech(
        )

    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        raise e


@ -5394,7 +5448,12 @@ async def audio_transcriptions(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5403,7 +5462,6 @@ async def audio_transcriptions(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5531,7 +5589,12 @@ async def get_assistants(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5540,7 +5603,6 @@ async def get_assistants(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5660,7 +5722,12 @@ async def create_threads(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5669,7 +5736,6 @@ async def create_threads(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5788,7 +5854,12 @@ async def get_thread(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5797,7 +5868,6 @@ async def get_thread(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5919,7 +5989,12 @@ async def add_messages(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5928,7 +6003,6 @@ async def add_messages(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6046,7 +6120,12 @@ async def get_messages(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6055,7 +6134,6 @@ async def get_messages(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6187,7 +6265,12 @@ async def run_thread(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6196,7 +6279,6 @@ async def run_thread(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6335,7 +6417,12 @@ async def create_batch(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6344,7 +6431,6 @@ async def create_batch(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6478,7 +6564,12 @@ async def retrieve_batch(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6631,7 +6722,12 @@ async def create_file(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6640,7 +6736,6 @@ async def create_file(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6816,7 +6911,12 @@ async def moderations(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.moderations(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e)),
@ -6825,7 +6925,6 @@ async def moderations(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
-            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -7115,9 +7214,33 @@ async def generate_key_fn(
                )
            )

+        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+        if litellm.store_audit_logs is True:
+            _updated_values = json.dumps(response)
+            asyncio.create_task(
+                create_audit_log_for_update(
+                    request_data=LiteLLM_AuditLogs(
+                        id=str(uuid.uuid4()),
+                        updated_at=datetime.now(timezone.utc),
+                        changed_by=user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                        table_name=LitellmTableNames.KEY_TABLE_NAME,
+                        object_id=response.get("token_id", ""),
+                        action="created",
+                        updated_values=_updated_values,
+                        before_value=None,
+                    )
+                )
+            )
+
        return GenerateKeyResponse(**response)
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -7138,7 +7261,11 @@ async def generate_key_fn(
@router.post(
    "/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
 )
-async def update_key_fn(request: Request, data: UpdateKeyRequest):
+async def update_key_fn(
+    request: Request,
+    data: UpdateKeyRequest,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
    """
    Update an existing key
    """
@ -7150,6 +7277,16 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
        if prisma_client is None:
            raise Exception("Not connected to DB!")

+        existing_key_row = await prisma_client.get_data(
+            token=data.key, table_name="key", query_type="find_unique"
+        )
+
+        if existing_key_row is None:
+            raise HTTPException(
+                status_code=404,
+                detail={"error": f"Team not found, passed team_id={data.team_id}"},
+            )
+
        # get non default values for key
        non_default_values = {}
        for k, v in data_json.items():
@ -7176,6 +7313,29 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
        hashed_token = hash_token(key)
        user_api_key_cache.delete_cache(hashed_token)

+        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+        if litellm.store_audit_logs is True:
+            _updated_values = json.dumps(data_json)
+
+            _before_value = existing_key_row.json(exclude_none=True)
+            _before_value = json.dumps(_before_value)
+
+            asyncio.create_task(
+                create_audit_log_for_update(
+                    request_data=LiteLLM_AuditLogs(
+                        id=str(uuid.uuid4()),
+                        updated_at=datetime.now(timezone.utc),
+                        changed_by=user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                        table_name=LitellmTableNames.KEY_TABLE_NAME,
+                        object_id=data.key,
+                        action="updated",
+                        updated_values=_updated_values,
+                        before_value=_before_value,
+                    )
+                )
+            )
+
        return {"key": key, **response["data"]}
        # update based on remaining passed in values
    except Exception as e:
@ -7238,6 +7398,34 @@ async def delete_key_fn(
        ):
            user_id = None  # unless they're admin

+        # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+        # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
+        if litellm.store_audit_logs is True:
+            # make an audit log for each team deleted
+            for key in data.keys:
+                key_row = await prisma_client.get_data(  # type: ignore
+                    token=key, table_name="key", query_type="find_unique"
+                )
+
+                key_row = key_row.json(exclude_none=True)
+                _key_row = json.dumps(key_row)
+
+                asyncio.create_task(
+                    create_audit_log_for_update(
+                        request_data=LiteLLM_AuditLogs(
+                            id=str(uuid.uuid4()),
+                            updated_at=datetime.now(timezone.utc),
+                            changed_by=user_api_key_dict.user_id
+                            or litellm_proxy_admin_name,
+                            table_name=LitellmTableNames.KEY_TABLE_NAME,
+                            object_id=key,
+                            action="deleted",
+                            updated_values="{}",
+                            before_value=_key_row,
+                        )
+                    )
+                )
+
        number_deleted_keys = await delete_verification_token(
            tokens=keys, user_id=user_id
        )
@ -9507,7 +9695,12 @@ async def user_info(
        }
        return response_data
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.user_info(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9602,7 +9795,12 @@ async def user_update(data: UpdateUserRequest):
        return response
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.user_update(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9655,7 +9853,12 @@ async def user_request_model(request: Request):
        return {"status": "success"}
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.user_request_model(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9697,7 +9900,12 @@ async def user_get_requests():
        return {"requests": response}
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.user_get_requests(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -10087,7 +10295,12 @@ async def update_end_user(

        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10171,7 +10384,12 @@ async def delete_end_user(

        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10365,12 +10583,65 @@ async def new_team(
                }
            },
        )
+
+    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+    if litellm.store_audit_logs is True:
+        _updated_values = complete_team_data.json(exclude_none=True)
+        _updated_values = json.dumps(_updated_values)
+
+        asyncio.create_task(
+            create_audit_log_for_update(
+                request_data=LiteLLM_AuditLogs(
+                    id=str(uuid.uuid4()),
+                    updated_at=datetime.now(timezone.utc),
+                    changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
+                    table_name=LitellmTableNames.TEAM_TABLE_NAME,
+                    object_id=data.team_id,
+                    action="created",
+                    updated_values=_updated_values,
+                    before_value=None,
+                )
+            )
+        )
+
    try:
        return team_row.model_dump()
    except Exception as e:
        return team_row.dict()


+async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
+    if premium_user is not True:
+        return
+
+    if litellm.store_audit_logs is not True:
+        return
+    if prisma_client is None:
+        raise Exception("prisma_client is None, no DB connected")
+
+    verbose_proxy_logger.debug("creating audit log for %s", request_data)
+
+    if isinstance(request_data.updated_values, dict):
+        request_data.updated_values = json.dumps(request_data.updated_values)
+
+    if isinstance(request_data.before_value, dict):
+        request_data.before_value = json.dumps(request_data.before_value)
+
+    _request_data = request_data.dict(exclude_none=True)
+
+    try:
+        await prisma_client.db.litellm_auditlog.create(
+            data={
+                **_request_data,  # type: ignore
+            }
+        )
+    except Exception as e:
+        # [Non-Blocking Exception. Do not allow blocking LLM API call]
+        verbose_proxy_logger.error(f"Failed Creating audit log {e}")
+
+    return
+
+
@router.post(
    "/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
 )
@ -10443,6 +10714,27 @@ async def update_team(
        team_id=data.team_id,
    )

+    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+    if litellm.store_audit_logs is True:
+        _before_value = existing_team_row.json(exclude_none=True)
+        _before_value = json.dumps(_before_value)
+        _after_value: str = json.dumps(updated_kv)
+
+        asyncio.create_task(
+            create_audit_log_for_update(
+                request_data=LiteLLM_AuditLogs(
+                    id=str(uuid.uuid4()),
+                    updated_at=datetime.now(timezone.utc),
+                    changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
+                    table_name=LitellmTableNames.TEAM_TABLE_NAME,
+                    object_id=data.team_id,
+                    action="updated",
+                    updated_values=_after_value,
+                    before_value=_before_value,
+                )
+            )
+        )
+
    return team_row


@ -10714,6 +11006,35 @@ async def delete_team(
                detail={"error": f"Team not found, passed team_id={team_id}"},
            )

+    # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
+    # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
+    if litellm.store_audit_logs is True:
+        # make an audit log for each team deleted
+        for team_id in data.team_ids:
+            team_row = await prisma_client.get_data(  # type: ignore
+                team_id=team_id, table_name="team", query_type="find_unique"
+            )
+
+            _team_row = team_row.json(exclude_none=True)
+
+            asyncio.create_task(
+                create_audit_log_for_update(
+                    request_data=LiteLLM_AuditLogs(
+                        id=str(uuid.uuid4()),
+                        updated_at=datetime.now(timezone.utc),
+                        changed_by=user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                        table_name=LitellmTableNames.TEAM_TABLE_NAME,
+                        object_id=team_id,
+                        action="deleted",
+                        updated_values="{}",
+                        before_value=_team_row,
+                    )
+                )
+            )
+
+    # End of Audit logging
+
    ## DELETE ASSOCIATED KEYS
    await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
    ## DELETE TEAMS
@ -11371,7 +11692,12 @@ async def add_new_model(
        return model_response

    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -11485,7 +11811,12 @@ async def update_model(

            return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.update_model(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -13719,7 +14050,12 @@ async def update_config(config_info: ConfigYAML):

        return {"message": "Config updated successfully"}
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.update_config(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14192,7 +14528,12 @@ async def get_config():
            "available_callbacks": all_available_callbacks,
        }
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.get_config(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14443,7 +14784,12 @@ async def health_services_endpoint(
            }

    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14522,7 +14868,12 @@ async def health_endpoint(
                "unhealthy_count": len(unhealthy_endpoints),
            }
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
+            "litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
+                str(e)
+            )
+        )
+        verbose_proxy_logger.debug(traceback.format_exc())
        raise e


--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@ -243,4 +243,16 @@ model LiteLLM_InvitationLink {
  liteLLM_user_table_user    LiteLLM_UserTable  @relation("UserId", fields: [user_id], references: [user_id])
  liteLLM_user_table_created LiteLLM_UserTable  @relation("CreatedBy", fields: [created_by], references: [user_id])
  liteLLM_user_table_updated LiteLLM_UserTable  @relation("UpdatedBy", fields: [updated_by], references: [user_id])
+}
+
+
+model LiteLLM_AuditLog {
+  id           String   @id @default(uuid())
+  updated_at   DateTime @default(now())
+  changed_by   String       // user or system that performed the action
+  action      String        // create, update, delete
+  table_name  String       // on of  LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
+  object_id    String      // id of the object being audited. This can be the key id, team id, user id, model id
+  before_value Json?       // value of the row 
+  updated_values  Json?       // value of the row after change
 }
--- a/litellm/proxy/secret_managers/aws_secret_manager.py
+++ b/litellm/proxy/secret_managers/aws_secret_manager.py
@ -8,7 +8,8 @@ Requires:
 * `pip install boto3>=1.28.57`
 """

-import litellm, os
+import litellm
+import os
 from typing import Optional
 from litellm.proxy._types import KeyManagementSystem

@ -38,3 +39,21 @@ def load_aws_secret_manager(use_aws_secret_manager: Optional[bool]):

    except Exception as e:
        raise e
+
+
+def load_aws_kms(use_aws_kms: Optional[bool]):
+    if use_aws_kms is None or use_aws_kms is False:
+        return
+    try:
+        import boto3
+
+        validate_environment()
+
+        # Create a Secrets Manager client
+        kms_client = boto3.client("kms", region_name=os.getenv("AWS_REGION_NAME"))
+
+        litellm.secret_manager_client = kms_client
+        litellm._key_management_system = KeyManagementSystem.AWS_KMS
+
+    except Exception as e:
+        raise e
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -2709,13 +2709,15 @@ def decrypt_value(value: bytes, master_key: str) -> str:


 # LiteLLM Admin UI - Non SSO Login
-html_form = """
+url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
+url_to_redirect_to += "/login"
+html_form = f"""
 <!DOCTYPE html>
 <html>
 <head>
    <title>LiteLLM Login</title>
    <style>
-        body {
+        body {{
            font-family: Arial, sans-serif;
            background-color: #f4f4f4;
            margin: 0;
@ -2724,42 +2726,42 @@ html_form = """
            justify-content: center;
            align-items: center;
            height: 100vh;
-        }
+        }}

-        form {
+        form {{
            background-color: #fff;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
-        }
+        }}

-        label {
+        label {{
            display: block;
            margin-bottom: 8px;
-        }
+        }}

-        input {
+        input {{
            width: 100%;
            padding: 8px;
            margin-bottom: 16px;
            box-sizing: border-box;
            border: 1px solid #ccc;
            border-radius: 4px;
-        }
+        }}

-        input[type="submit"] {
+        input[type="submit"] {{
            background-color: #4caf50;
            color: #fff;
            cursor: pointer;
-        }
+        }}

-        input[type="submit"]:hover {
+        input[type="submit"]:hover {{
            background-color: #45a049;
-        }
+        }}
    </style>
 </head>
 <body>
-    <form action="/login" method="post">
+    <form action="{url_to_redirect_to}" method="post">
        <h2>LiteLLM Login</h2>

        <p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
@ -2771,8 +2773,6 @@ html_form = """
        <input type="password" id="password" name="password" required>
        <input type="submit" value="Submit">
    </form>
-</body>
-</html>
 """


@ -2837,3 +2837,17 @@ missing_keys_html_form = """
    </body>
    </html>
    """
+
+
+def get_error_message_str(e: Exception) -> str:
+    error_message = ""
+    if isinstance(e, HTTPException):
+        if isinstance(e.detail, str):
+            error_message = e.detail
+        elif isinstance(e.detail, dict):
+            error_message = json.dumps(e.detail)
+        else:
+            error_message = str(e)
+    else:
+        error_message = str(e)
+    return error_message
--- a/litellm/router.py
+++ b/litellm/router.py
@ -220,8 +220,6 @@ class Router:
            []
        )  # names of models under litellm_params. ex. azure/chatgpt-v-2
        self.deployment_latency_map = {}
-        ### SCHEDULER ###
-        self.scheduler = Scheduler(polling_interval=polling_interval)
        ### CACHING ###
        cache_type: Literal["local", "redis"] = "local"  # default to an in-memory cache
        redis_cache = None
@ -259,6 +257,10 @@ class Router:
            redis_cache=redis_cache, in_memory_cache=InMemoryCache()
        )  # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.

+        ### SCHEDULER ###
+        self.scheduler = Scheduler(
+            polling_interval=polling_interval, redis_cache=redis_cache
+        )
        self.default_deployment = None  # use this to track the users default deployment, when they want to use model = *
        self.default_max_parallel_requests = default_max_parallel_requests

@ -2096,8 +2098,8 @@ class Router:
                        except Exception as e:
                            raise e
            except Exception as e:
-                verbose_router_logger.debug(f"An exception occurred - {str(e)}")
-                traceback.print_exc()
+                verbose_router_logger.error(f"An exception occurred - {str(e)}")
+                verbose_router_logger.debug(traceback.format_exc())
            raise original_exception

    async def async_function_with_retries(self, *args, **kwargs):
@ -4048,6 +4050,12 @@ class Router:
            for idx in reversed(invalid_model_indices):
                _returned_deployments.pop(idx)

+        ## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
+        if len(_returned_deployments) > 0:
+            _returned_deployments = litellm.utils._get_order_filtered_deployments(
+                _returned_deployments
+            )
+
        return _returned_deployments

    def _common_checks_available_deployment(
--- a/litellm/router_strategy/lowest_cost.py
+++ b/litellm/router_strategy/lowest_cost.py
@ -1,11 +1,9 @@
 #### What this does ####
 #   picks based on response time (for streaming, this is time to first token)
-from pydantic import BaseModel, Extra, Field, root_validator
-import os, requests, random  # type: ignore
+from pydantic import BaseModel
 from typing import Optional, Union, List, Dict
 from datetime import datetime, timedelta
-import random
-
+from litellm import verbose_logger
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
@ -119,7 +117,12 @@ class LowestCostLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -201,7 +204,12 @@ class LowestCostLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    async def async_get_available_deployments(
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@ -1,16 +1,16 @@
 #### What this does ####
 #   picks based on response time (for streaming, this is time to first token)
-from pydantic import BaseModel, Extra, Field, root_validator  # type: ignore
-import dotenv, os, requests, random  # type: ignore
+from pydantic import BaseModel
+import random
 from typing import Optional, Union, List, Dict
 from datetime import datetime, timedelta
-import random
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm import ModelResponse
 from litellm import token_counter
 import litellm
+from litellm import verbose_logger


 class LiteLLMBase(BaseModel):
@ -165,7 +165,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
@ -229,7 +234,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
                # do nothing if it's not a timeout error
                return
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -352,7 +362,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    def get_available_deployments(
--- a/litellm/router_strategy/lowest_tpm_rpm.py
+++ b/litellm/router_strategy/lowest_tpm_rpm.py
@ -11,6 +11,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_router_logger
 from litellm.utils import print_verbose

+
 class LiteLLMBase(BaseModel):
    """
    Implements default functions, all pydantic objects should have.
@ -23,16 +24,20 @@ class LiteLLMBase(BaseModel):
            # if using pydantic v1
            return self.dict()

+
 class RoutingArgs(LiteLLMBase):
-    ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
-    
+    ttl: int = 1 * 60  # 1min (RPM/TPM expire key)
+
+
 class LowestTPMLoggingHandler(CustomLogger):
    test_flag: bool = False
    logged_success: int = 0
    logged_failure: int = 0
    default_cache_time_seconds: int = 1 * 60 * 60  # 1 hour

-    def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
+    def __init__(
+        self, router_cache: DualCache, model_list: list, routing_args: dict = {}
+    ):
        self.router_cache = router_cache
        self.model_list = model_list
        self.routing_args = RoutingArgs(**routing_args)
@ -72,19 +77,28 @@ class LowestTPMLoggingHandler(CustomLogger):
                request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens

-                self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
+                    key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
+                )

                ## RPM
                request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + 1

-                self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
+                    key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
+                )

                ### TESTING ###
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_router_logger.error(
+                "litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_router_logger.debug(traceback.format_exc())
            pass

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -123,19 +137,28 @@ class LowestTPMLoggingHandler(CustomLogger):
                request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens

-                self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
+                    key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
+                )

                ## RPM
                request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + 1

-                self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
+                    key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
+                )

                ### TESTING ###
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_router_logger.error(
+                "litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_router_logger.debug(traceback.format_exc())
            pass

    def get_available_deployments(
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -1,19 +1,19 @@
 #### What this does ####
 #   identifies lowest tpm deployment
 from pydantic import BaseModel
-import dotenv, os, requests, random
+import random
 from typing import Optional, Union, List, Dict
-import datetime as datetime_og
-from datetime import datetime
-import traceback, asyncio, httpx
+import traceback
+import httpx
 import litellm
 from litellm import token_counter
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm._logging import verbose_router_logger
+from litellm._logging import verbose_router_logger, verbose_logger
 from litellm.utils import print_verbose, get_utc_datetime
 from litellm.types.router import RouterErrors

+
 class LiteLLMBase(BaseModel):
    """
    Implements default functions, all pydantic objects should have.
@ -22,12 +22,14 @@ class LiteLLMBase(BaseModel):
    def json(self, **kwargs):
        try:
            return self.model_dump()  # noqa
-        except:
+        except Exception as e:
            # if using pydantic v1
            return self.dict()

+
 class RoutingArgs(LiteLLMBase):
-    ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
+    ttl: int = 1 * 60  # 1min (RPM/TPM expire key)
+

 class LowestTPMLoggingHandler_v2(CustomLogger):
    """
@ -47,7 +49,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
    logged_failure: int = 0
    default_cache_time_seconds: int = 1 * 60 * 60  # 1 hour

-    def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
+    def __init__(
+        self, router_cache: DualCache, model_list: list, routing_args: dict = {}
+    ):
        self.router_cache = router_cache
        self.model_list = model_list
        self.routing_args = RoutingArgs(**routing_args)
@ -104,7 +108,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                )
            else:
                # if local result below limit, check redis ## prevent unnecessary redis checks
-                result = self.router_cache.increment_cache(key=rpm_key, value=1, ttl=self.routing_args.ttl)
+                result = self.router_cache.increment_cache(
+                    key=rpm_key, value=1, ttl=self.routing_args.ttl
+                )
                if result is not None and result > deployment_rpm:
                    raise litellm.RateLimitError(
                        message="Deployment over defined rpm limit={}. current usage={}".format(
@ -244,12 +250,19 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                # update cache

                ## TPM
-                self.router_cache.increment_cache(key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl)
+                self.router_cache.increment_cache(
+                    key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl
+                )
                ### TESTING ###
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -295,7 +308,12 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            pass

    def _common_checks_available_deployment(
--- a/litellm/scheduler.py
+++ b/litellm/scheduler.py
@ -1,13 +1,14 @@
-import heapq, time
+import heapq
 from pydantic import BaseModel
 from typing import Optional
 import enum
-from litellm.caching import DualCache
+from litellm.caching import DualCache, RedisCache
 from litellm import print_verbose


 class SchedulerCacheKeys(enum.Enum):
    queue = "scheduler:queue"
+    default_in_memory_ttl = 5  # cache queue in-memory for 5s when redis cache available


 class DefaultPriorities(enum.Enum):
@ -25,18 +26,24 @@ class FlowItem(BaseModel):
 class Scheduler:
    cache: DualCache

-    def __init__(self, polling_interval: Optional[float] = None):
+    def __init__(
+        self,
+        polling_interval: Optional[float] = None,
+        redis_cache: Optional[RedisCache] = None,
+    ):
        """
        polling_interval: float or null - frequency of polling queue. Default is 3ms.
        """
        self.queue: list = []
-        self.cache = DualCache()
+        default_in_memory_ttl: Optional[float] = None
+        if redis_cache is not None:
+            # if redis-cache available frequently poll that instead of using in-memory.
+            default_in_memory_ttl = SchedulerCacheKeys.default_in_memory_ttl.value
+        self.cache = DualCache(
+            redis_cache=redis_cache, default_in_memory_ttl=default_in_memory_ttl
+        )
        self.polling_interval = polling_interval or 0.03  # default to 3ms

-    def update_variables(self, cache: Optional[DualCache] = None):
-        if cache is not None:
-            self.cache = cache
-
    async def add_request(self, request: FlowItem):
        # We use the priority directly, as lower values indicate higher priority
        # get the queue
--- a/litellm/tests/test_assistants.py
+++ b/litellm/tests/test_assistants.py
@ -198,7 +198,11 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
                )
                assert isinstance(messages.data[0], Message)
            else:
-                pytest.fail("An unexpected error occurred when running the thread")
+                pytest.fail(
+                    "An unexpected error occurred when running the thread, {}".format(
+                        run
+                    )
+                )

    else:
        added_message = await litellm.a_add_message(**data)
@ -226,4 +230,8 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
                )
                assert isinstance(messages.data[0], Message)
            else:
-                pytest.fail("An unexpected error occurred when running the thread")
+                pytest.fail(
+                    "An unexpected error occurred when running the thread, {}".format(
+                        run
+                    )
+                )
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -2169,6 +2169,7 @@ def test_completion_azure_key_completion_arg():
            logprobs=True,
            max_tokens=10,
        )
+
        print(f"response: {response}")

        print("Hidden Params", response._hidden_params)
@ -2544,6 +2545,8 @@ def test_replicate_custom_prompt_dict():
                    "content": "what is yc write 1 paragraph",
                }
            ],
+            mock_response="Hello world",
+            mock_response="hello world",
            repetition_penalty=0.1,
            num_retries=3,
        )
--- a/litellm/tests/test_image_generation.py
+++ b/litellm/tests/test_image_generation.py
@ -76,7 +76,7 @@ def test_image_generation_azure_dall_e_3():
        )
        print(f"response: {response}")
        assert len(response.data) > 0
-    except litellm.RateLimitError as e:
+    except litellm.InternalServerError as e:
        pass
    except litellm.ContentPolicyViolationError:
        pass  # OpenAI randomly raises these errors - skip when they occur
--- a/litellm/tests/test_lowest_cost_routing.py
+++ b/litellm/tests/test_lowest_cost_routing.py
@ -102,18 +102,18 @@ async def test_get_available_deployments_custom_price():
@pytest.mark.asyncio
 async def test_lowest_cost_routing():
    """
-    Test if router returns model with the lowest cost
+    Test if router, returns model with the lowest cost
    """
    model_list = [
        {
-            "model_name": "gpt-3.5-turbo",
+            "model_name": "gpt-4",
            "litellm_params": {"model": "gpt-4"},
            "model_info": {"id": "openai-gpt-4"},
        },
        {
            "model_name": "gpt-3.5-turbo",
-            "litellm_params": {"model": "groq/llama3-8b-8192"},
-            "model_info": {"id": "groq-llama"},
+            "litellm_params": {"model": "gpt-3.5-turbo"},
+            "model_info": {"id": "gpt-3.5-turbo"},
        },
    ]

@ -127,7 +127,7 @@ async def test_lowest_cost_routing():
    print(
        response._hidden_params["model_id"]
    )  # expect groq-llama, since groq/llama has lowest cost
-    assert "groq-llama" == response._hidden_params["model_id"]
+    assert "gpt-3.5-turbo" == response._hidden_params["model_id"]


 async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration):
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -38,6 +38,48 @@ def test_router_sensitive_keys():
        assert "special-key" not in str(e)


+def test_router_order():
+    """
+    Asserts for 2 models in a model group, model with order=1 always called first
+    """
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-4o",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                    "mock_response": "Hello world",
+                    "order": 1,
+                },
+                "model_info": {"id": "1"},
+            },
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-4o",
+                    "api_key": "bad-key",
+                    "mock_response": Exception("this is a bad key"),
+                    "order": 2,
+                },
+                "model_info": {"id": "2"},
+            },
+        ],
+        num_retries=0,
+        allowed_fails=0,
+        enable_pre_call_checks=True,
+    )
+
+    for _ in range(100):
+        response = router.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hey, how's it going?"}],
+        )
+
+        assert isinstance(response, litellm.ModelResponse)
+        assert response._hidden_params["model_id"] == "1"
+
+
@pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4])
 def test_router_num_retries_init(num_retries, max_retries):
--- a/litellm/tests/test_token_counter.py
+++ b/litellm/tests/test_token_counter.py
@ -186,3 +186,13 @@ def test_load_test_token_counter(model):
    total_time = end_time - start_time
    print("model={}, total test time={}".format(model, total_time))
    assert total_time < 10, f"Total encoding time > 10s, {total_time}"
+
+def test_openai_token_with_image_and_text():
+    model = "gpt-4o"
+    full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n    Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
+    messages = full_request.get("messages", [])
+
+    token_count = token_counter(model=model, messages=messages)
+    print(token_count)
+    
+test_openai_token_with_image_and_text()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1374,8 +1374,12 @@ class Logging:
                            callback_func=callback,
                        )
                except Exception as e:
-                    traceback.print_exc()
-                    print_verbose(
+                    verbose_logger.error(
+                        "litellm.Logging.pre_call(): Exception occured - {}".format(
+                            str(e)
+                        )
+                    )
+                    verbose_logger.debug(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
                    )
                    print_verbose(
@ -4062,6 +4066,7 @@ def openai_token_counter(
                    for c in value:
                        if c["type"] == "text":
                            text += c["text"]
+                            num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
                        elif c["type"] == "image_url":
                            if isinstance(c["image_url"], dict):
                                image_url_dict = c["image_url"]
@ -6194,6 +6199,27 @@ def calculate_max_parallel_requests(
    return None


+def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
+    min_order = min(
+        (
+            deployment["litellm_params"]["order"]
+            for deployment in healthy_deployments
+            if "order" in deployment["litellm_params"]
+        ),
+        default=None,
+    )
+
+    if min_order is not None:
+        filtered_deployments = [
+            deployment
+            for deployment in healthy_deployments
+            if deployment["litellm_params"].get("order") == min_order
+        ]
+
+        return filtered_deployments
+    return healthy_deployments
+
+
 def _get_model_region(
    custom_llm_provider: str, litellm_params: LiteLLM_Params
 ) -> Optional[str]:
@ -7336,6 +7362,10 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:

    if custom_llm_provider == "databricks":
        return litellm.DatabricksConfig().get_required_params()
+
+    elif custom_llm_provider == "ollama":
+        return litellm.OllamaConfig().get_required_params()
+
    else:
        return []

@ -9782,8 +9812,7 @@ def exception_type(
            elif custom_llm_provider == "azure":
                if "Internal server error" in error_str:
                    exception_mapping_worked = True
-                    raise APIError(
-                        status_code=500,
+                    raise litellm.InternalServerError(
                        message=f"AzureException Internal server error - {original_exception.message}",
                        llm_provider="azure",
                        model=model,
@ -10033,6 +10062,8 @@ def get_secret(
 ):
    key_management_system = litellm._key_management_system
    key_management_settings = litellm._key_management_settings
+    args = locals()
+
    if secret_name.startswith("os.environ/"):
        secret_name = secret_name.replace("os.environ/", "")

@ -10120,13 +10151,13 @@ def get_secret(
                        key_manager = "local"

                if (
-                    key_manager == KeyManagementSystem.AZURE_KEY_VAULT
+                    key_manager == KeyManagementSystem.AZURE_KEY_VAULT.value
                    or type(client).__module__ + "." + type(client).__name__
                    == "azure.keyvault.secrets._client.SecretClient"
                ):  # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
                    secret = client.get_secret(secret_name).value
                elif (
-                    key_manager == KeyManagementSystem.GOOGLE_KMS
+                    key_manager == KeyManagementSystem.GOOGLE_KMS.value
                    or client.__class__.__name__ == "KeyManagementServiceClient"
                ):
                    encrypted_secret: Any = os.getenv(secret_name)
@ -10154,6 +10185,25 @@ def get_secret(
                    secret = response.plaintext.decode(
                        "utf-8"
                    )  # assumes the original value was encoded with utf-8
+                elif key_manager == KeyManagementSystem.AWS_KMS.value:
+                    """
+                    Only check the tokens which start with 'aws_kms/'. This prevents latency impact caused by checking all keys.
+                    """
+                    encrypted_value = os.getenv(secret_name, None)
+                    if encrypted_value is None:
+                        raise Exception("encrypted value for AWS KMS cannot be None.")
+                    # Decode the base64 encoded ciphertext
+                    ciphertext_blob = base64.b64decode(encrypted_value)
+
+                    # Set up the parameters for the decrypt call
+                    params = {"CiphertextBlob": ciphertext_blob}
+
+                    # Perform the decryption
+                    response = client.decrypt(**params)
+
+                    # Extract and decode the plaintext
+                    plaintext = response["Plaintext"]
+                    secret = plaintext.decode("utf-8")
                elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
                    try:
                        get_secret_value_response = client.get_secret_value(
@ -10174,10 +10224,14 @@ def get_secret(
                    for k, v in secret_dict.items():
                        secret = v
                    print_verbose(f"secret: {secret}")
+                elif key_manager == "local":
+                    secret = os.getenv(secret_name)
                else:  # assume the default is infisicial client
                    secret = client.get_secret(secret_name).secret_value
            except Exception as e:  # check if it's in os.environ
-                print_verbose(f"An exception occurred - {str(e)}")
+                verbose_logger.error(
+                    f"An exception occurred - {str(e)}\n\n{traceback.format_exc()}"
+                )
                secret = os.getenv(secret_name)
            try:
                secret_value_as_bool = ast.literal_eval(secret)
@ -10511,7 +10565,12 @@ class CustomStreamWrapper:
                "finish_reason": finish_reason,
            }
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            raise e

    def handle_huggingface_chunk(self, chunk):
@ -10555,7 +10614,12 @@ class CustomStreamWrapper:
                "finish_reason": finish_reason,
            }
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            raise e

    def handle_ai21_chunk(self, chunk):  # fake streaming
@ -10790,7 +10854,12 @@ class CustomStreamWrapper:
                "usage": usage,
            }
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            raise e

    def handle_azure_text_completion_chunk(self, chunk):
@ -10871,7 +10940,12 @@ class CustomStreamWrapper:
            else:
                return ""
        except:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            return ""

    def handle_cloudlfare_stream(self, chunk):
@ -11070,7 +11144,12 @@ class CustomStreamWrapper:
                "is_finished": True,
            }
        except:
-            traceback.print_exc()
+            verbose_logger.error(
+                "litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
+                    str(e)
+                )
+            )
+            verbose_logger.debug(traceback.format_exc())
            return ""

    def model_response_creator(self):
@ -11557,7 +11636,12 @@ class CustomStreamWrapper:
                                        tool["type"] = "function"
                            model_response.choices[0].delta = Delta(**_json_delta)
                        except Exception as e:
-                            traceback.print_exc()
+                            verbose_logger.error(
+                                "litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
+                                    str(e)
+                                )
+                            )
+                            verbose_logger.debug(traceback.format_exc())
                            model_response.choices[0].delta = Delta()
                    else:
                        try:
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@ -2114,6 +2114,7 @@ files = [
    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
    {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
    {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
    {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@ -2121,8 +2122,15 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
    {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
    {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@ -2139,6 +2147,7 @@ files = [
    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
    {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
    {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
    {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@ -2146,6 +2155,7 @@ files = [
    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
    {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
    {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
    {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@ -3140,4 +3150,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "a54d969a1a707413e7cd3ce869d14ef73dd41bb9d36ebf0fb878d9e929bc15b3"
+content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.40.3"
+version = "1.40.5"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -62,7 +62,8 @@ extra_proxy = [
    "azure-identity",
    "azure-keyvault-secrets",
    "google-cloud-kms",
-    "resend"
+    "resend",
+    "pynacl"
 ]

 [tool.poetry.scripts]
@ -79,7 +80,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.40.3"
+version = "1.40.5"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/schema.prisma
+++ b/schema.prisma
@ -243,4 +243,16 @@ model LiteLLM_InvitationLink {
  liteLLM_user_table_user    LiteLLM_UserTable  @relation("UserId", fields: [user_id], references: [user_id])
  liteLLM_user_table_created LiteLLM_UserTable  @relation("CreatedBy", fields: [created_by], references: [user_id])
  liteLLM_user_table_updated LiteLLM_UserTable  @relation("UpdatedBy", fields: [updated_by], references: [user_id])
+}
+
+
+model LiteLLM_AuditLog {
+  id           String   @id @default(uuid())
+  updated_at   DateTime @default(now())
+  changed_by   String       // user or system that performed the action
+  action      String        // create, update, delete
+  table_name  String       // on of  LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
+  object_id    String      // id of the object being audited. This can be the key id, team id, user id, model id
+  before_value Json?       // value of the row 
+  updated_values  Json?       // value of the row after change
 }
--- a/ui/litellm-dashboard/src/components/model_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx
@ -145,6 +145,7 @@ enum Providers {
  OpenAI_Compatible = "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",
  Vertex_AI = "Vertex AI (Anthropic, Gemini, etc.)",
  Databricks = "Databricks",
+  Ollama = "Ollama",
 }

 const provider_map: Record<string, string> = {
@ -156,6 +157,7 @@ const provider_map: Record<string, string> = {
  OpenAI_Compatible: "openai",
  Vertex_AI: "vertex_ai",
  Databricks: "databricks",
+  Ollama: "ollama",
 };

 const retry_policy_map: Record<string, string> = {
@ -1747,6 +1749,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
                    )}
                  {selectedProvider != Providers.Bedrock &&
                    selectedProvider != Providers.Vertex_AI &&
+                    selectedProvider != Providers.Ollama &&
                    (dynamicProviderForm === undefined ||
                      dynamicProviderForm.fields.length == 0) && (
                      <Form.Item