refactor: replace 'traceback.print_exc()' with logging library

allows error logs to be in json format for otel logging
2024-06-06 13:47:43 -07:00 · 2024-06-06 13:47:43 -07:00 · 6cca5612d2
commit 6cca5612d2
parent 58bd2b4ea6
41 changed files with 542 additions and 225 deletions
--- a/enterprise/enterprise_callbacks/example_logging_api.py
+++ b/enterprise/enterprise_callbacks/example_logging_api.py
@ -18,10 +18,6 @@ async def log_event(request: Request):
        return {"message": "Request received successfully"}
    except Exception as e:
        print(f"Error processing request: {str(e)}")
        import traceback
        traceback.print_exc()
        raise HTTPException(status_code=500, detail="Internal Server Error")
--- a/enterprise/enterprise_callbacks/generic_api_callback.py
+++ b/enterprise/enterprise_callbacks/generic_api_callback.py
@ -120,6 +120,5 @@ class GenericAPILogger:
            )
            return response
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(f"Generic - {str(e)}\n{traceback.format_exc()}")
            verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
            pass
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -82,7 +82,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(traceback.format_exc())
    async def async_post_call_success_hook(
        self,
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@ -118,4 +118,4 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(traceback.format_exc())
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -92,7 +92,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                        },
                    )
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(traceback.format_exc())
            raise e
    def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool:
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -1,5 +1,6 @@
 import logging, os, json
 from logging import Formatter
 import traceback
 set_verbose = False
 json_logs = bool(os.getenv("JSON_LOGS", False))
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -253,7 +253,6 @@ class RedisCache(BaseCache):
                str(e),
                value,
            )
            traceback.print_exc()
            raise e
    async def async_scan_iter(self, pattern: str, count: int = 100) -> list:
@ -313,7 +312,6 @@ class RedisCache(BaseCache):
                str(e),
                value,
            )
            traceback.print_exc()
        key = self.check_and_fix_namespace(key=key)
        async with _redis_client as redis_client:
@ -352,7 +350,6 @@ class RedisCache(BaseCache):
                    str(e),
                    value,
                )
                traceback.print_exc()
    async def async_set_cache_pipeline(self, cache_list, ttl=None):
        """
@ -413,7 +410,6 @@ class RedisCache(BaseCache):
                str(e),
                cache_value,
            )
            traceback.print_exc()
    async def batch_cache_write(self, key, value, **kwargs):
        print_verbose(
@ -458,7 +454,6 @@ class RedisCache(BaseCache):
                str(e),
                value,
            )
            traceback.print_exc()
            raise e
    async def flush_cache_buffer(self):
@ -495,8 +490,9 @@ class RedisCache(BaseCache):
            return self._get_cache_logic(cached_response=cached_response)
        except Exception as e:
            # NON blocking - notify users Redis is throwing an exception
-            traceback.print_exc()
+            verbose_logger.error(
-            logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+                "LiteLLM Caching: get() - Got exception from REDIS: ", e
            )
    def batch_get_cache(self, key_list) -> dict:
        """
@ -646,10 +642,9 @@ class RedisCache(BaseCache):
                error=e,
                call_type="sync_ping",
            )
-            print_verbose(
+            verbose_logger.error(
                f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
            )
            traceback.print_exc()
            raise e
    async def ping(self) -> bool:
@ -683,10 +678,9 @@ class RedisCache(BaseCache):
                        call_type="async_ping",
                    )
                )
-                print_verbose(
+                verbose_logger.error(
                    f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
                )
                traceback.print_exc()
                raise e
    async def delete_cache_keys(self, keys):
@ -1138,22 +1132,23 @@ class S3Cache(BaseCache):
                    cached_response = ast.literal_eval(cached_response)
            if type(cached_response) is not dict:
                cached_response = dict(cached_response)
-            print_verbose(
+            verbose_logger.debug(
                f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
            )
            return cached_response
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchKey":
-                print_verbose(
+                verbose_logger.error(
                    f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
                )
                return None
        except Exception as e:
            # NON blocking - notify users S3 is throwing an exception
-            traceback.print_exc()
+            verbose_logger.error(
-            print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
+                f"S3 Caching: get_cache() - Got exception from S3: {e}"
            )
    async def async_get_cache(self, key, **kwargs):
        return self.get_cache(key=key, **kwargs)
@ -1234,8 +1229,7 @@ class DualCache(BaseCache):
            return result
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
            traceback.print_exc()
            raise e
    def get_cache(self, key, local_only: bool = False, **kwargs):
@ -1262,7 +1256,7 @@ class DualCache(BaseCache):
            print_verbose(f"get cache: cache result: {result}")
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())
    def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs):
        try:
@ -1295,7 +1289,7 @@ class DualCache(BaseCache):
            print_verbose(f"async batch get cache: cache result: {result}")
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())
    async def async_get_cache(self, key, local_only: bool = False, **kwargs):
        # Try to fetch from in-memory cache first
@ -1328,7 +1322,7 @@ class DualCache(BaseCache):
            print_verbose(f"get cache: cache result: {result}")
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())
    async def async_batch_get_cache(
        self, keys: list, local_only: bool = False, **kwargs
@ -1368,7 +1362,7 @@ class DualCache(BaseCache):
            return result
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(traceback.format_exc())
    async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
        print_verbose(
@ -1381,8 +1375,8 @@ class DualCache(BaseCache):
            if self.redis_cache is not None and local_only == False:
                await self.redis_cache.async_set_cache(key, value, **kwargs)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.debug(traceback.format_exc())
    async def async_batch_set_cache(
        self, cache_list: list, local_only: bool = False, **kwargs
@ -1404,8 +1398,8 @@ class DualCache(BaseCache):
                    cache_list=cache_list, ttl=kwargs.get("ttl", None)
                )
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.debug(traceback.format_exc())
    async def async_increment_cache(
        self, key, value: float, local_only: bool = False, **kwargs
@ -1429,8 +1423,8 @@ class DualCache(BaseCache):
            return result
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.debug(traceback.format_exc())
            raise e
    def flush_cache(self):
@ -1846,8 +1840,8 @@ class Cache:
            )
            self.cache.set_cache(cache_key, cached_data, **kwargs)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.debug(traceback.format_exc())
            pass
    async def async_add_cache(self, result, *args, **kwargs):
@ -1864,8 +1858,8 @@ class Cache:
                )
                await self.cache.async_set_cache(cache_key, cached_data, **kwargs)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.debug(traceback.format_exc())
    async def async_add_cache_pipeline(self, result, *args, **kwargs):
        """
@ -1897,8 +1891,8 @@ class Cache:
                    )
                await asyncio.gather(*tasks)
        except Exception as e:
-            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
-            traceback.print_exc()
+            verbose_logger.debug(traceback.format_exc())
    async def batch_cache_write(self, result, *args, **kwargs):
        cache_key, cached_data, kwargs = self._add_cache_logic(
--- a/litellm/integrations/aispend.py
+++ b/litellm/integrations/aispend.py
@ -169,6 +169,5 @@ class AISpendLogger:
            print_verbose(f"AISpend Logging - final data object: {data}")
        except:
            # traceback.print_exc()
            print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/berrispend.py
+++ b/litellm/integrations/berrispend.py
@ -178,6 +178,5 @@ class BerriSpendLogger:
            print_verbose(f"BerriSpend Logging - final data object: {data}")
            response = requests.post(url, headers=headers, json=data)
        except:
            # traceback.print_exc()
            print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -297,6 +297,5 @@ class ClickhouseLogger:
            # make request to endpoint with payload
            verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
        except Exception as e:
            traceback.print_exc()
            verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
            pass
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -115,7 +115,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
            )
            print_verbose(f"Custom Logger - model call details: {kwargs}")
        except:
            traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
    async def async_log_input_event(
@ -130,7 +129,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
            )
            print_verbose(f"Custom Logger - model call details: {kwargs}")
        except:
            traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
    def log_event(
@ -146,7 +144,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
                end_time,
            )
        except:
            # traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
            pass
@ -163,6 +160,5 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
                end_time,
            )
        except:
            # traceback.print_exc()
            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/datadog.py
+++ b/litellm/integrations/datadog.py
@ -134,7 +134,6 @@ class DataDogLogger:
                f"Datadog Layer Logging - final response object: {response_obj}"
            )
        except Exception as e:
            traceback.print_exc()
            verbose_logger.debug(
                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
--- a/litellm/integrations/dynamodb.py
+++ b/litellm/integrations/dynamodb.py
@ -85,6 +85,5 @@ class DyanmoDBLogger:
            )
            return response
        except:
            traceback.print_exc()
            print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/helicone.py
+++ b/litellm/integrations/helicone.py
@ -112,6 +112,5 @@ class HeliconeLogger:
                )
                print_verbose(f"Helicone Logging - Error {response.text}")
        except:
            # traceback.print_exc()
            print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -78,15 +78,21 @@ class LangFuseLogger:
        For example if you want to append your trace to an existing `trace_id` via header, send
        `headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
        """
-        proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {})
+        proxy_headers = litellm_params.get("proxy_server_request", {}).get(
            "headers", {}
        )
        for metadata_param_key in proxy_headers:
            if metadata_param_key.startswith("langfuse_"):
                trace_param_key = metadata_param_key.replace("langfuse_", "", 1)
                if trace_param_key in metadata:
-                    verbose_logger.warning(f"Overwriting Langfuse `{trace_param_key}` from request header")
+                    verbose_logger.warning(
                        f"Overwriting Langfuse `{trace_param_key}` from request header"
                    )
                else:
-                    verbose_logger.debug(f"Found Langfuse `{trace_param_key}` in request header")
+                    verbose_logger.debug(
                        f"Found Langfuse `{trace_param_key}` in request header"
                    )
                metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
        return metadata
@ -205,9 +211,11 @@ class LangFuseLogger:
            verbose_logger.info(f"Langfuse Layer Logging - logging success")
            return {"trace_id": trace_id, "generation_id": generation_id}
-        except:
+        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
-            verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
+                "Langfuse Layer Error(): Exception occured - {}".format(str(e))
            )
            verbose_logger.debug(traceback.format_exc())
            return {"trace_id": None, "generation_id": None}
    async def _async_log_event(
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@ -44,7 +44,9 @@ class LangsmithLogger:
        print_verbose(
            f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
        )
-        langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com")
+        langsmith_base_url = os.getenv(
            "LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
        )
        try:
            print_verbose(
@ -89,9 +91,7 @@ class LangsmithLogger:
            }
            url = f"{langsmith_base_url}/runs"
-            print_verbose(
+            print_verbose(f"Langsmith Logging - About to send data to {url} ...")
                f"Langsmith Logging - About to send data to {url} ..."
            )
            response = requests.post(
                url=url,
                json=data,
@ -106,6 +106,5 @@ class LangsmithLogger:
                f"Langsmith Layer Logging - final response object: {response_obj}"
            )
        except:
            # traceback.print_exc()
            print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/logfire_logger.py
+++ b/litellm/integrations/logfire_logger.py
@ -171,7 +171,6 @@ class LogfireLogger:
                f"Logfire Layer Logging - final response object: {response_obj}"
            )
        except Exception as e:
            traceback.print_exc()
            verbose_logger.debug(
                f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
--- a/litellm/integrations/lunary.py
+++ b/litellm/integrations/lunary.py
@ -14,6 +14,7 @@ def parse_usage(usage):
        "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
    }
 def parse_tool_calls(tool_calls):
    if tool_calls is None:
        return None
@ -26,7 +27,7 @@ def parse_tool_calls(tool_calls):
            "function": {
                "name": tool_call.function.name,
                "arguments": tool_call.function.arguments,
-            }
+            },
        }
        return serialized
@ -176,6 +177,5 @@ class LunaryLogger:
            )
        except:
            # traceback.print_exc()
            print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -109,8 +109,8 @@ class PrometheusLogger:
                    end_user_id, user_api_key, model, user_api_team, user_id
                ).inc()
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
-            verbose_logger.debug(
+                "prometheus Layer Error(): Exception occured - {}".format(str(e))
                f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
            verbose_logger.debug(traceback.format_exc())
            pass
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@ -180,6 +180,5 @@ class S3Logger:
            print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
            return response
        except Exception as e:
            traceback.print_exc()
            verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
            pass
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -110,6 +110,5 @@ class Supabase:
                )
        except:
            # traceback.print_exc()
            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
            pass
--- a/litellm/integrations/weights_biases.py
+++ b/litellm/integrations/weights_biases.py
@ -217,6 +217,5 @@ class WeightsBiasesLogger:
                f"W&B Logging Logging - final response object: {response_obj}"
            )
        except:
            # traceback.print_exc()
            print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
            pass
--- a/litellm/llms/gemini.py
+++ b/litellm/llms/gemini.py
@ -1,13 +1,14 @@
-import os, types, traceback, copy, asyncio
+import types
-import json
+import traceback
-from enum import Enum
+import copy
 import time
 from typing import Callable, Optional
-from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
+from litellm.utils import ModelResponse, Choices, Message, Usage
 import litellm
-import sys, httpx
+import httpx
 from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt
 from packaging.version import Version
 from litellm import verbose_logger
 class GeminiError(Exception):
@ -264,7 +265,8 @@ def completion(
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
        verbose_logger.debug(traceback.format_exc())
        raise GeminiError(
            message=traceback.format_exc(), status_code=response.status_code
        )
@ -356,7 +358,8 @@ async def async_completion(
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
        verbose_logger.debug(traceback.format_exc())
        raise GeminiError(
            message=traceback.format_exc(), status_code=response.status_code
        )
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -6,6 +6,7 @@ from typing import Optional
 import litellm
 import httpx, aiohttp, asyncio  # type: ignore
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm import verbose_logger
 class OllamaError(Exception):
@ -124,6 +125,7 @@ class OllamaConfig:
            )
            and v is not None
        }
    def get_supported_openai_params(
        self,
    ):
@ -138,10 +140,12 @@ class OllamaConfig:
            "response_format",
        ]
 # ollama wants plain base64 jpeg/png files as images.  strip any leading dataURI
 # and convert to jpeg if necessary.
 def _convert_image(image):
    import base64, io
    try:
        from PIL import Image
    except:
@ -391,7 +395,13 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
                async for transformed_chunk in streamwrapper:
                    yield transformed_chunk
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
            "LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_logger.debug(traceback.format_exc())
        raise e
@ -455,7 +465,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
            )
            return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
            "LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_logger.debug(traceback.format_exc())
        raise e
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -1,11 +1,15 @@
 from itertools import chain
-import requests, types, time
+import requests
-import json, uuid
+import types
 import time
 import json
 import uuid
 import traceback
 from typing import Optional
 from litellm import verbose_logger
 import litellm
-import httpx, aiohttp, asyncio
+import httpx
-from .prompt_templates.factory import prompt_factory, custom_prompt
+import aiohttp
 class OllamaError(Exception):
@ -299,7 +303,10 @@ def get_ollama_response(
            tool_calls=[
                {
                    "id": f"call_{str(uuid.uuid4())}",
-                    "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                    "function": {
                        "name": function_call["name"],
                        "arguments": json.dumps(function_call["arguments"]),
                    },
                    "type": "function",
                }
            ],
@ -307,7 +314,9 @@ def get_ollama_response(
        model_response["choices"][0]["message"] = message
        model_response["choices"][0]["finish_reason"] = "tool_calls"
    else:
-        model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
+        model_response["choices"][0]["message"]["content"] = response_json["message"][
            "content"
        ]
    model_response["created"] = int(time.time())
    model_response["model"] = "ollama/" + model
    prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages))  # type: ignore
@ -361,7 +370,10 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
                    tool_calls=[
                        {
                            "id": f"call_{str(uuid.uuid4())}",
-                            "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                            "function": {
                                "name": function_call["name"],
                                "arguments": json.dumps(function_call["arguments"]),
                            },
                            "type": "function",
                        }
                    ],
@ -410,9 +422,10 @@ async def ollama_async_streaming(
                first_chunk_content = first_chunk.choices[0].delta.content or ""
                response_content = first_chunk_content + "".join(
                    [
-                    chunk.choices[0].delta.content
+                        chunk.choices[0].delta.content
-                    async for chunk in streamwrapper
+                        async for chunk in streamwrapper
-                    if chunk.choices[0].delta.content]
+                        if chunk.choices[0].delta.content
                    ]
                )
                function_call = json.loads(response_content)
                delta = litellm.utils.Delta(
@ -420,7 +433,10 @@ async def ollama_async_streaming(
                    tool_calls=[
                        {
                            "id": f"call_{str(uuid.uuid4())}",
-                            "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                            "function": {
                                "name": function_call["name"],
                                "arguments": json.dumps(function_call["arguments"]),
                            },
                            "type": "function",
                        }
                    ],
@ -433,7 +449,8 @@ async def ollama_async_streaming(
                async for transformed_chunk in streamwrapper:
                    yield transformed_chunk
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error("LiteLLM.gemini(): Exception occured - {}".format(str(e)))
        verbose_logger.debug(traceback.format_exc())
 async def ollama_acompletion(
@ -483,7 +500,10 @@ async def ollama_acompletion(
                    tool_calls=[
                        {
                            "id": f"call_{str(uuid.uuid4())}",
-                            "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
+                            "function": {
                                "name": function_call["name"],
                                "arguments": json.dumps(function_call["arguments"]),
                            },
                            "type": "function",
                        }
                    ],
@ -491,7 +511,9 @@ async def ollama_acompletion(
                model_response["choices"][0]["message"] = message
                model_response["choices"][0]["finish_reason"] = "tool_calls"
            else:
-                model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
+                model_response["choices"][0]["message"]["content"] = response_json[
                    "message"
                ]["content"]
            model_response["created"] = int(time.time())
            model_response["model"] = "ollama_chat/" + data["model"]
@ -509,5 +531,9 @@ async def ollama_acompletion(
            )
            return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
            "LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
        )
        verbose_logger.debug(traceback.format_exc())
        raise e
--- a/litellm/llms/palm.py
+++ b/litellm/llms/palm.py
@ -1,11 +1,12 @@
-import os, types, traceback, copy
+import types
-import json
+import traceback
-from enum import Enum
+import copy
 import time
 from typing import Callable, Optional
-from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
+from litellm.utils import ModelResponse, Choices, Message, Usage
 import litellm
-import sys, httpx
+import httpx
 from litellm import verbose_logger
 class PalmError(Exception):
@ -165,7 +166,10 @@ def completion(
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
            "litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
        )
        verbose_logger.debug(traceback.format_exc())
        raise PalmError(
            message=traceback.format_exc(), status_code=response.status_code
        )
--- a/litellm/main.py
+++ b/litellm/main.py
@ -364,7 +364,10 @@ async def acompletion(
            )  # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
        return response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
            "litellm.acompletion(): Exception occured - {}".format(str(e))
        )
        verbose_logger.debug(traceback.format_exc())
        custom_llm_provider = custom_llm_provider or "openai"
        raise exception_type(
            model=model,
@ -477,7 +480,10 @@ def mock_completion(
    except Exception as e:
        if isinstance(e, openai.APIError):
            raise e
-        traceback.print_exc()
+        verbose_logger.error(
            "litellm.mock_completion(): Exception occured - {}".format(str(e))
        )
        verbose_logger.debug(traceback.format_exc())
        raise Exception("Mock completion response failed")
@ -4430,7 +4436,10 @@ async def ahealth_check(
                response = {}  # args like remaining ratelimit etc.
        return response
    except Exception as e:
-        traceback.print_exc()
+        verbose_logger.error(
            "litellm.ahealth_check(): Exception occured - {}".format(str(e))
        )
        verbose_logger.debug(traceback.format_exc())
        stack_trace = traceback.format_exc()
        if isinstance(stack_trace, str):
            stack_trace = stack_trace[:1000]
--- a/litellm/proxy/_logging.py
+++ b/litellm/proxy/_logging.py
@ -1,6 +1,7 @@
 import json
 import logging
 from logging import Formatter
 import sys
 class JsonFormatter(Formatter):
--- a/litellm/proxy/hooks/azure_content_safety.py
+++ b/litellm/proxy/hooks/azure_content_safety.py
@ -88,7 +88,7 @@ class _PROXY_AzureContentSafety(
            verbose_proxy_logger.debug(
                "Error in Azure Content-Safety: %s", traceback.format_exc()
            )
-            traceback.print_exc()
+            verbose_proxy_logger.debug(traceback.format_exc())
            raise
        result = self._compute_result(response)
@ -123,7 +123,12 @@ class _PROXY_AzureContentSafety(
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
                "litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_proxy_logger.debug(traceback.format_exc())
    async def async_post_call_success_hook(
        self,
--- a/litellm/proxy/hooks/batch_redis_get.py
+++ b/litellm/proxy/hooks/batch_redis_get.py
@ -94,7 +94,12 @@ class _PROXY_BatchRedisRequests(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
                "litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_proxy_logger.debug(traceback.format_exc())
    async def async_get_cache(self, *args, **kwargs):
        """
--- a/litellm/proxy/hooks/cache_control_check.py
+++ b/litellm/proxy/hooks/cache_control_check.py
@ -1,13 +1,13 @@
 # What this does?
 ## Checks if key is allowed to use the cache controls passed in to the completion() call
 from typing import Optional
 import litellm
 from litellm import verbose_logger
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
-import json, traceback
+import traceback
 class _PROXY_CacheControlCheck(CustomLogger):
@ -54,4 +54,9 @@ class _PROXY_CacheControlCheck(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
--- a/litellm/proxy/hooks/max_budget_limiter.py
+++ b/litellm/proxy/hooks/max_budget_limiter.py
@ -1,10 +1,10 @@
-from typing import Optional
+from litellm import verbose_logger
 import litellm
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
-import json, traceback
+import traceback
 class _PROXY_MaxBudgetLimiter(CustomLogger):
@ -44,4 +44,9 @@ class _PROXY_MaxBudgetLimiter(CustomLogger):
        except HTTPException as e:
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
--- a/litellm/proxy/hooks/presidio_pii_masking.py
+++ b/litellm/proxy/hooks/presidio_pii_masking.py
@ -8,8 +8,8 @@
 #  Tell us how we can improve! - Krrish & Ishaan
-from typing import Optional, Literal, Union
+from typing import Optional, Union
-import litellm, traceback, sys, uuid, json
+import litellm, traceback, uuid, json  # noqa: E401
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
@ -21,8 +21,8 @@ from litellm.utils import (
    ImageResponse,
    StreamingChoices,
 )
-from datetime import datetime
+import aiohttp
-import aiohttp, asyncio
+import asyncio
 class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -138,7 +138,12 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
                else:
                    raise Exception(f"Invalid anonymizer response: {redacted_text}")
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
                "litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_proxy_logger.debug(traceback.format_exc())
            raise e
    async def async_pre_call_hook(
--- a/litellm/proxy/hooks/prompt_injection_detection.py
+++ b/litellm/proxy/hooks/prompt_injection_detection.py
@ -204,7 +204,12 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
                return e.detail["error"]
            raise e
        except Exception as e:
-            traceback.print_exc()
+            verbose_proxy_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_proxy_logger.debug(traceback.format_exc())
    async def async_moderation_hook(
        self,
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -125,7 +125,10 @@ from litellm.router import (
    AssistantsTypedDict,
 )
 from litellm.router import ModelInfo as RouterModelInfo
-from litellm._logging import verbose_router_logger, verbose_proxy_logger
+from litellm._logging import (
    verbose_router_logger,
    verbose_proxy_logger,
 )
 from litellm.proxy.auth.handle_jwt import JWTHandler
 from litellm.proxy.auth.litellm_license import LicenseCheck
 from litellm.proxy.auth.model_checks import (
@ -1471,7 +1474,12 @@ async def user_api_key_auth(
        else:
            raise Exception()
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, litellm.BudgetExceededError):
            raise ProxyException(
                message=e.message, type="auth_error", param=None, code=400
@ -3476,7 +3484,12 @@ async def generate_key_helper_fn(
            )
            key_data["token_id"] = getattr(create_key_response, "token", None)
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise e
        raise HTTPException(
@ -3515,7 +3528,12 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None)
        else:
            raise Exception("DB not connected. prisma_client is None")
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        raise e
    return deleted_tokens
@ -3676,7 +3694,12 @@ async def async_assistants_data_generator(
        done_message = "[DONE]"
        yield f"data: {done_message}\n\n"
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict,
            original_exception=e,
@ -3686,9 +3709,6 @@ async def async_assistants_data_generator(
            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
        if user_debug:
            traceback.print_exc()
        if isinstance(e, HTTPException):
            raise e
        else:
@ -3728,7 +3748,12 @@ async def async_data_generator(
        done_message = "[DONE]"
        yield f"data: {done_message}\n\n"
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict,
            original_exception=e,
@ -3738,8 +3763,6 @@ async def async_data_generator(
            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
        if user_debug:
            traceback.print_exc()
        if isinstance(e, HTTPException):
            raise e
@ -4386,7 +4409,12 @@ async def chat_completion(
        return _chat_response
    except Exception as e:
        data["litellm_status"] = "fail"  # used for alerting
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
@ -4397,8 +4425,6 @@ async def chat_completion(
            litellm_debug_info,
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
        if user_debug:
            traceback.print_exc()
        if isinstance(e, HTTPException):
            raise ProxyException(
@ -4630,15 +4656,12 @@ async def completion(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
+        verbose_proxy_logger.error(
-        litellm_debug_info = getattr(e, "litellm_debug_info", "")
+            "litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
-        verbose_proxy_logger.debug(
+                str(e)
-            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            )
            e,
            litellm_debug_info,
        )
-        traceback.print_exc()
+        verbose_proxy_logger.debug(traceback.format_exc())
        error_traceback = traceback.format_exc()
        error_msg = f"{str(e)}"
        raise ProxyException(
            message=getattr(e, "message", error_msg),
@ -4848,7 +4871,12 @@ async def embeddings(
            e,
            litellm_debug_info,
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e)),
@ -5027,7 +5055,12 @@ async def image_generation(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e)),
@ -5205,7 +5238,12 @@ async def audio_speech(
        )
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        raise e
@ -5394,7 +5432,12 @@ async def audio_transcriptions(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5403,7 +5446,6 @@ async def audio_transcriptions(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5531,7 +5573,12 @@ async def get_assistants(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5540,7 +5587,6 @@ async def get_assistants(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5660,7 +5706,12 @@ async def create_threads(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5669,7 +5720,6 @@ async def create_threads(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5788,7 +5838,12 @@ async def get_thread(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5797,7 +5852,6 @@ async def get_thread(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -5919,7 +5973,12 @@ async def add_messages(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -5928,7 +5987,6 @@ async def add_messages(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6046,7 +6104,12 @@ async def get_messages(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6055,7 +6118,6 @@ async def get_messages(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6187,7 +6249,12 @@ async def run_thread(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6196,7 +6263,6 @@ async def run_thread(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6335,7 +6401,12 @@ async def create_batch(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6344,7 +6415,6 @@ async def create_batch(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6478,7 +6548,12 @@ async def retrieve_batch(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6631,7 +6706,12 @@ async def create_file(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e.detail)),
@ -6640,7 +6720,6 @@ async def create_file(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -6816,7 +6895,12 @@ async def moderations(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.moderations(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "message", str(e)),
@ -6825,7 +6909,6 @@ async def moderations(
                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
            )
        else:
            error_traceback = traceback.format_exc()
            error_msg = f"{str(e)}"
            raise ProxyException(
                message=getattr(e, "message", error_msg),
@ -7136,7 +7219,12 @@ async def generate_key_fn(
        return GenerateKeyResponse(**response)
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9591,7 +9679,12 @@ async def user_info(
        }
        return response_data
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.user_info(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9686,7 +9779,12 @@ async def user_update(data: UpdateUserRequest):
        return response
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.user_update(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9739,7 +9837,12 @@ async def user_request_model(request: Request):
        return {"status": "success"}
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.user_request_model(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9781,7 +9884,12 @@ async def user_get_requests():
        return {"requests": response}
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.user_get_requests(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -10171,7 +10279,12 @@ async def update_end_user(
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10255,7 +10368,12 @@ async def delete_end_user(
        # update based on remaining passed in values
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -11558,7 +11676,12 @@ async def add_new_model(
        return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -11672,7 +11795,12 @@ async def update_model(
            return model_response
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.update_model(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -13906,7 +14034,12 @@ async def update_config(config_info: ConfigYAML):
        return {"message": "Config updated successfully"}
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.update_config(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14379,7 +14512,12 @@ async def get_config():
            "available_callbacks": all_available_callbacks,
        }
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.get_config(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14630,7 +14768,12 @@ async def health_services_endpoint(
            }
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14709,7 +14852,12 @@ async def health_endpoint(
                "unhealthy_count": len(unhealthy_endpoints),
            }
    except Exception as e:
-        traceback.print_exc()
+        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        raise e
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2096,8 +2096,8 @@ class Router:
                        except Exception as e:
                            raise e
            except Exception as e:
-                verbose_router_logger.debug(f"An exception occurred - {str(e)}")
+                verbose_router_logger.error(f"An exception occurred - {str(e)}")
-                traceback.print_exc()
+                verbose_router_logger.debug(traceback.format_exc())
            raise original_exception
    async def async_function_with_retries(self, *args, **kwargs):
--- a/litellm/router_strategy/lowest_cost.py
+++ b/litellm/router_strategy/lowest_cost.py
@ -1,11 +1,9 @@
 #### What this does ####
 #   picks based on response time (for streaming, this is time to first token)
-from pydantic import BaseModel, Extra, Field, root_validator
+from pydantic import BaseModel
 import os, requests, random  # type: ignore
 from typing import Optional, Union, List, Dict
 from datetime import datetime, timedelta
-import random
+from litellm import verbose_logger
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
@ -119,7 +117,12 @@ class LowestCostLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -201,7 +204,12 @@ class LowestCostLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    async def async_get_available_deployments(
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@ -1,16 +1,16 @@
 #### What this does ####
 #   picks based on response time (for streaming, this is time to first token)
-from pydantic import BaseModel, Extra, Field, root_validator  # type: ignore
+from pydantic import BaseModel
-import dotenv, os, requests, random  # type: ignore
+import random
 from typing import Optional, Union, List, Dict
 from datetime import datetime, timedelta
 import random
 import traceback
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm import ModelResponse
 from litellm import token_counter
 import litellm
 from litellm import verbose_logger
 class LiteLLMBase(BaseModel):
@ -165,7 +165,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
@ -229,7 +234,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
                # do nothing if it's not a timeout error
                return
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -352,7 +362,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    def get_available_deployments(
--- a/litellm/router_strategy/lowest_tpm_rpm.py
+++ b/litellm/router_strategy/lowest_tpm_rpm.py
@ -11,6 +11,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_router_logger
 from litellm.utils import print_verbose
 class LiteLLMBase(BaseModel):
    """
    Implements default functions, all pydantic objects should have.
@ -23,8 +24,10 @@ class LiteLLMBase(BaseModel):
            # if using pydantic v1
            return self.dict()
 class RoutingArgs(LiteLLMBase):
-    ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
+    ttl: int = 1 * 60  # 1min (RPM/TPM expire key)
 class LowestTPMLoggingHandler(CustomLogger):
    test_flag: bool = False
@ -32,7 +35,9 @@ class LowestTPMLoggingHandler(CustomLogger):
    logged_failure: int = 0
    default_cache_time_seconds: int = 1 * 60 * 60  # 1 hour
-    def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
+    def __init__(
        self, router_cache: DualCache, model_list: list, routing_args: dict = {}
    ):
        self.router_cache = router_cache
        self.model_list = model_list
        self.routing_args = RoutingArgs(**routing_args)
@ -72,19 +77,28 @@ class LowestTPMLoggingHandler(CustomLogger):
                request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
-                self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
                    key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
                )
                ## RPM
                request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + 1
-                self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
                    key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
                )
                ### TESTING ###
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_router_logger.error(
                "litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_router_logger.debug(traceback.format_exc())
            pass
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -123,19 +137,28 @@ class LowestTPMLoggingHandler(CustomLogger):
                request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
-                self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
                    key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
                )
                ## RPM
                request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
                request_count_dict[id] = request_count_dict.get(id, 0) + 1
-                self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
+                self.router_cache.set_cache(
                    key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
                )
                ### TESTING ###
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_router_logger.error(
                "litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_router_logger.debug(traceback.format_exc())
            pass
    def get_available_deployments(
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -1,19 +1,19 @@
 #### What this does ####
 #   identifies lowest tpm deployment
 from pydantic import BaseModel
-import dotenv, os, requests, random
+import random
 from typing import Optional, Union, List, Dict
-import datetime as datetime_og
+import traceback
-from datetime import datetime
+import httpx
 import traceback, asyncio, httpx
 import litellm
 from litellm import token_counter
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm._logging import verbose_router_logger
+from litellm._logging import verbose_router_logger, verbose_logger
 from litellm.utils import print_verbose, get_utc_datetime
 from litellm.types.router import RouterErrors
 class LiteLLMBase(BaseModel):
    """
    Implements default functions, all pydantic objects should have.
@ -22,12 +22,14 @@ class LiteLLMBase(BaseModel):
    def json(self, **kwargs):
        try:
            return self.model_dump()  # noqa
-        except:
+        except Exception as e:
            # if using pydantic v1
            return self.dict()
 class RoutingArgs(LiteLLMBase):
-    ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
+    ttl: int = 1 * 60  # 1min (RPM/TPM expire key)
 class LowestTPMLoggingHandler_v2(CustomLogger):
    """
@ -47,7 +49,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
    logged_failure: int = 0
    default_cache_time_seconds: int = 1 * 60 * 60  # 1 hour
-    def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
+    def __init__(
        self, router_cache: DualCache, model_list: list, routing_args: dict = {}
    ):
        self.router_cache = router_cache
        self.model_list = model_list
        self.routing_args = RoutingArgs(**routing_args)
@ -104,7 +108,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                )
            else:
                # if local result below limit, check redis ## prevent unnecessary redis checks
-                result = self.router_cache.increment_cache(key=rpm_key, value=1, ttl=self.routing_args.ttl)
+                result = self.router_cache.increment_cache(
                    key=rpm_key, value=1, ttl=self.routing_args.ttl
                )
                if result is not None and result > deployment_rpm:
                    raise litellm.RateLimitError(
                        message="Deployment over defined rpm limit={}. current usage={}".format(
@ -244,12 +250,19 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                # update cache
                ## TPM
-                self.router_cache.increment_cache(key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl)
+                self.router_cache.increment_cache(
                    key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl
                )
                ### TESTING ###
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -295,7 +308,12 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                if self.test_flag:
                    self.logged_success += 1
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            pass
    def _common_checks_available_deployment(
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1372,8 +1372,12 @@ class Logging:
                            callback_func=callback,
                        )
                except Exception as e:
-                    traceback.print_exc()
+                    verbose_logger.error(
-                    print_verbose(
+                        "litellm.Logging.pre_call(): Exception occured - {}".format(
                            str(e)
                        )
                    )
                    verbose_logger.debug(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
                    )
                    print_verbose(
@ -10526,7 +10530,12 @@ class CustomStreamWrapper:
                "finish_reason": finish_reason,
            }
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            raise e
    def handle_huggingface_chunk(self, chunk):
@ -10570,7 +10579,12 @@ class CustomStreamWrapper:
                "finish_reason": finish_reason,
            }
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            raise e
    def handle_ai21_chunk(self, chunk):  # fake streaming
@ -10805,7 +10819,12 @@ class CustomStreamWrapper:
                "usage": usage,
            }
        except Exception as e:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            raise e
    def handle_azure_text_completion_chunk(self, chunk):
@ -10886,7 +10905,12 @@ class CustomStreamWrapper:
            else:
                return ""
        except:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            return ""
    def handle_cloudlfare_stream(self, chunk):
@ -11085,7 +11109,12 @@ class CustomStreamWrapper:
                "is_finished": True,
            }
        except:
-            traceback.print_exc()
+            verbose_logger.error(
                "litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
                    str(e)
                )
            )
            verbose_logger.debug(traceback.format_exc())
            return ""
    def model_response_creator(self):
@ -11557,7 +11586,12 @@ class CustomStreamWrapper:
                                        tool["type"] = "function"
                            model_response.choices[0].delta = Delta(**_json_delta)
                        except Exception as e:
-                            traceback.print_exc()
+                            verbose_logger.error(
                                "litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
                                    str(e)
                                )
                            )
                            verbose_logger.debug(traceback.format_exc())
                            model_response.choices[0].delta = Delta()
                    else:
                        try: