From 6cca5612d2570e8de84e30f764302d60eef4bbe9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 6 Jun 2024 13:47:43 -0700 Subject: [PATCH] refactor: replace 'traceback.print_exc()' with logging library allows error logs to be in json format for otel logging --- .../example_logging_api.py | 4 - .../generic_api_callback.py | 3 +- .../enterprise_hooks/banned_keywords.py | 2 +- .../enterprise_hooks/blocked_user_list.py | 2 +- enterprise/enterprise_hooks/llm_guard.py | 2 +- litellm/_logging.py | 1 + litellm/caching.py | 60 ++-- litellm/integrations/aispend.py | 1 - litellm/integrations/berrispend.py | 1 - litellm/integrations/clickhouse.py | 1 - litellm/integrations/custom_logger.py | 4 - litellm/integrations/datadog.py | 1 - litellm/integrations/dynamodb.py | 1 - litellm/integrations/helicone.py | 1 - litellm/integrations/langfuse.py | 22 +- litellm/integrations/langsmith.py | 9 +- litellm/integrations/logfire_logger.py | 1 - litellm/integrations/lunary.py | 8 +- litellm/integrations/prometheus.py | 6 +- litellm/integrations/s3.py | 1 - litellm/integrations/supabase.py | 1 - litellm/integrations/weights_biases.py | 1 - litellm/llms/gemini.py | 17 +- litellm/llms/ollama.py | 19 +- litellm/llms/ollama_chat.py | 56 +++- litellm/llms/palm.py | 16 +- litellm/main.py | 15 +- litellm/proxy/_logging.py | 1 + litellm/proxy/hooks/azure_content_safety.py | 9 +- litellm/proxy/hooks/batch_redis_get.py | 7 +- litellm/proxy/hooks/cache_control_check.py | 11 +- litellm/proxy/hooks/max_budget_limiter.py | 11 +- litellm/proxy/hooks/presidio_pii_masking.py | 15 +- .../proxy/hooks/prompt_injection_detection.py | 7 +- litellm/proxy/proxy_server.py | 266 ++++++++++++++---- litellm/router.py | 4 +- litellm/router_strategy/lowest_cost.py | 20 +- litellm/router_strategy/lowest_latency.py | 27 +- litellm/router_strategy/lowest_tpm_rpm.py | 41 ++- litellm/router_strategy/lowest_tpm_rpm_v2.py | 42 ++- litellm/utils.py | 50 +++- 41 files changed, 542 insertions(+), 225 deletions(-) diff --git a/enterprise/enterprise_callbacks/example_logging_api.py b/enterprise/enterprise_callbacks/example_logging_api.py index 57ea99a674..c3d3f5e63f 100644 --- a/enterprise/enterprise_callbacks/example_logging_api.py +++ b/enterprise/enterprise_callbacks/example_logging_api.py @@ -18,10 +18,6 @@ async def log_event(request: Request): return {"message": "Request received successfully"} except Exception as e: - print(f"Error processing request: {str(e)}") - import traceback - - traceback.print_exc() raise HTTPException(status_code=500, detail="Internal Server Error") diff --git a/enterprise/enterprise_callbacks/generic_api_callback.py b/enterprise/enterprise_callbacks/generic_api_callback.py index cf1d22e8f8..ba189b149c 100644 --- a/enterprise/enterprise_callbacks/generic_api_callback.py +++ b/enterprise/enterprise_callbacks/generic_api_callback.py @@ -120,6 +120,5 @@ class GenericAPILogger: ) return response except Exception as e: - traceback.print_exc() - verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}") + verbose_logger.error(f"Generic - {str(e)}\n{traceback.format_exc()}") pass diff --git a/enterprise/enterprise_hooks/banned_keywords.py b/enterprise/enterprise_hooks/banned_keywords.py index acd390d798..4cf68b2fd9 100644 --- a/enterprise/enterprise_hooks/banned_keywords.py +++ b/enterprise/enterprise_hooks/banned_keywords.py @@ -82,7 +82,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger): except HTTPException as e: raise e except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error(traceback.format_exc()) async def async_post_call_success_hook( self, diff --git a/enterprise/enterprise_hooks/blocked_user_list.py b/enterprise/enterprise_hooks/blocked_user_list.py index cbc14d2c2b..8e642a026f 100644 --- a/enterprise/enterprise_hooks/blocked_user_list.py +++ b/enterprise/enterprise_hooks/blocked_user_list.py @@ -118,4 +118,4 @@ class _ENTERPRISE_BlockedUserList(CustomLogger): except HTTPException as e: raise e except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error(traceback.format_exc()) diff --git a/enterprise/enterprise_hooks/llm_guard.py b/enterprise/enterprise_hooks/llm_guard.py index 3a15ca52b9..9db10cf79c 100644 --- a/enterprise/enterprise_hooks/llm_guard.py +++ b/enterprise/enterprise_hooks/llm_guard.py @@ -92,7 +92,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger): }, ) except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error(traceback.format_exc()) raise e def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool: diff --git a/litellm/_logging.py b/litellm/_logging.py index 1ff6e45ddb..ab7a08f976 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -1,5 +1,6 @@ import logging, os, json from logging import Formatter +import traceback set_verbose = False json_logs = bool(os.getenv("JSON_LOGS", False)) diff --git a/litellm/caching.py b/litellm/caching.py index c8c1736d86..d1f3387ee4 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -253,7 +253,6 @@ class RedisCache(BaseCache): str(e), value, ) - traceback.print_exc() raise e async def async_scan_iter(self, pattern: str, count: int = 100) -> list: @@ -313,7 +312,6 @@ class RedisCache(BaseCache): str(e), value, ) - traceback.print_exc() key = self.check_and_fix_namespace(key=key) async with _redis_client as redis_client: @@ -352,7 +350,6 @@ class RedisCache(BaseCache): str(e), value, ) - traceback.print_exc() async def async_set_cache_pipeline(self, cache_list, ttl=None): """ @@ -413,7 +410,6 @@ class RedisCache(BaseCache): str(e), cache_value, ) - traceback.print_exc() async def batch_cache_write(self, key, value, **kwargs): print_verbose( @@ -458,7 +454,6 @@ class RedisCache(BaseCache): str(e), value, ) - traceback.print_exc() raise e async def flush_cache_buffer(self): @@ -495,8 +490,9 @@ class RedisCache(BaseCache): return self._get_cache_logic(cached_response=cached_response) except Exception as e: # NON blocking - notify users Redis is throwing an exception - traceback.print_exc() - logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e) + verbose_logger.error( + "LiteLLM Caching: get() - Got exception from REDIS: ", e + ) def batch_get_cache(self, key_list) -> dict: """ @@ -646,10 +642,9 @@ class RedisCache(BaseCache): error=e, call_type="sync_ping", ) - print_verbose( + verbose_logger.error( f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}" ) - traceback.print_exc() raise e async def ping(self) -> bool: @@ -683,10 +678,9 @@ class RedisCache(BaseCache): call_type="async_ping", ) ) - print_verbose( + verbose_logger.error( f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}" ) - traceback.print_exc() raise e async def delete_cache_keys(self, keys): @@ -1138,22 +1132,23 @@ class S3Cache(BaseCache): cached_response = ast.literal_eval(cached_response) if type(cached_response) is not dict: cached_response = dict(cached_response) - print_verbose( + verbose_logger.debug( f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}" ) return cached_response except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "NoSuchKey": - print_verbose( + verbose_logger.error( f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket." ) return None except Exception as e: # NON blocking - notify users S3 is throwing an exception - traceback.print_exc() - print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}") + verbose_logger.error( + f"S3 Caching: get_cache() - Got exception from S3: {e}" + ) async def async_get_cache(self, key, **kwargs): return self.get_cache(key=key, **kwargs) @@ -1234,8 +1229,7 @@ class DualCache(BaseCache): return result except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") raise e def get_cache(self, key, local_only: bool = False, **kwargs): @@ -1262,7 +1256,7 @@ class DualCache(BaseCache): print_verbose(f"get cache: cache result: {result}") return result except Exception as e: - traceback.print_exc() + verbose_logger.error(traceback.format_exc()) def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs): try: @@ -1295,7 +1289,7 @@ class DualCache(BaseCache): print_verbose(f"async batch get cache: cache result: {result}") return result except Exception as e: - traceback.print_exc() + verbose_logger.error(traceback.format_exc()) async def async_get_cache(self, key, local_only: bool = False, **kwargs): # Try to fetch from in-memory cache first @@ -1328,7 +1322,7 @@ class DualCache(BaseCache): print_verbose(f"get cache: cache result: {result}") return result except Exception as e: - traceback.print_exc() + verbose_logger.error(traceback.format_exc()) async def async_batch_get_cache( self, keys: list, local_only: bool = False, **kwargs @@ -1368,7 +1362,7 @@ class DualCache(BaseCache): return result except Exception as e: - traceback.print_exc() + verbose_logger.error(traceback.format_exc()) async def async_set_cache(self, key, value, local_only: bool = False, **kwargs): print_verbose( @@ -1381,8 +1375,8 @@ class DualCache(BaseCache): if self.redis_cache is not None and local_only == False: await self.redis_cache.async_set_cache(key, value, **kwargs) except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") + verbose_logger.debug(traceback.format_exc()) async def async_batch_set_cache( self, cache_list: list, local_only: bool = False, **kwargs @@ -1404,8 +1398,8 @@ class DualCache(BaseCache): cache_list=cache_list, ttl=kwargs.get("ttl", None) ) except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") + verbose_logger.debug(traceback.format_exc()) async def async_increment_cache( self, key, value: float, local_only: bool = False, **kwargs @@ -1429,8 +1423,8 @@ class DualCache(BaseCache): return result except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") + verbose_logger.debug(traceback.format_exc()) raise e def flush_cache(self): @@ -1846,8 +1840,8 @@ class Cache: ) self.cache.set_cache(cache_key, cached_data, **kwargs) except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}") + verbose_logger.debug(traceback.format_exc()) pass async def async_add_cache(self, result, *args, **kwargs): @@ -1864,8 +1858,8 @@ class Cache: ) await self.cache.async_set_cache(cache_key, cached_data, **kwargs) except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}") + verbose_logger.debug(traceback.format_exc()) async def async_add_cache_pipeline(self, result, *args, **kwargs): """ @@ -1897,8 +1891,8 @@ class Cache: ) await asyncio.gather(*tasks) except Exception as e: - print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") - traceback.print_exc() + verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}") + verbose_logger.debug(traceback.format_exc()) async def batch_cache_write(self, result, *args, **kwargs): cache_key, cached_data, kwargs = self._add_cache_logic( diff --git a/litellm/integrations/aispend.py b/litellm/integrations/aispend.py index 2fe8ea0dfa..ca284e62e5 100644 --- a/litellm/integrations/aispend.py +++ b/litellm/integrations/aispend.py @@ -169,6 +169,5 @@ class AISpendLogger: print_verbose(f"AISpend Logging - final data object: {data}") except: - # traceback.print_exc() print_verbose(f"AISpend Logging Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/berrispend.py b/litellm/integrations/berrispend.py index 7d30b706c8..d428fb54d8 100644 --- a/litellm/integrations/berrispend.py +++ b/litellm/integrations/berrispend.py @@ -178,6 +178,5 @@ class BerriSpendLogger: print_verbose(f"BerriSpend Logging - final data object: {data}") response = requests.post(url, headers=headers, json=data) except: - # traceback.print_exc() print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/clickhouse.py b/litellm/integrations/clickhouse.py index 0c38b86267..f8b6b1bbf0 100644 --- a/litellm/integrations/clickhouse.py +++ b/litellm/integrations/clickhouse.py @@ -297,6 +297,5 @@ class ClickhouseLogger: # make request to endpoint with payload verbose_logger.debug(f"Clickhouse Logger - final response = {response}") except Exception as e: - traceback.print_exc() verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}") pass diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index e192cdaea7..1d447da1f2 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -115,7 +115,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac ) print_verbose(f"Custom Logger - model call details: {kwargs}") except: - traceback.print_exc() print_verbose(f"Custom Logger Error - {traceback.format_exc()}") async def async_log_input_event( @@ -130,7 +129,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac ) print_verbose(f"Custom Logger - model call details: {kwargs}") except: - traceback.print_exc() print_verbose(f"Custom Logger Error - {traceback.format_exc()}") def log_event( @@ -146,7 +144,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac end_time, ) except: - # traceback.print_exc() print_verbose(f"Custom Logger Error - {traceback.format_exc()}") pass @@ -163,6 +160,5 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac end_time, ) except: - # traceback.print_exc() print_verbose(f"Custom Logger Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/datadog.py b/litellm/integrations/datadog.py index 6d5e08faff..d835b3d670 100644 --- a/litellm/integrations/datadog.py +++ b/litellm/integrations/datadog.py @@ -134,7 +134,6 @@ class DataDogLogger: f"Datadog Layer Logging - final response object: {response_obj}" ) except Exception as e: - traceback.print_exc() verbose_logger.debug( f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}" ) diff --git a/litellm/integrations/dynamodb.py b/litellm/integrations/dynamodb.py index 21ccabe4b7..847f930ece 100644 --- a/litellm/integrations/dynamodb.py +++ b/litellm/integrations/dynamodb.py @@ -85,6 +85,5 @@ class DyanmoDBLogger: ) return response except: - traceback.print_exc() print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/helicone.py b/litellm/integrations/helicone.py index 85e73258ea..8ea18a7d5b 100644 --- a/litellm/integrations/helicone.py +++ b/litellm/integrations/helicone.py @@ -112,6 +112,5 @@ class HeliconeLogger: ) print_verbose(f"Helicone Logging - Error {response.text}") except: - # traceback.print_exc() print_verbose(f"Helicone Logging Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 4218e2dc52..acdde4438a 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -72,21 +72,27 @@ class LangFuseLogger: @staticmethod def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: """ - Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_" + Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_" and overwrites litellm_params.metadata if already included. For example if you want to append your trace to an existing `trace_id` via header, send `headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request. """ - proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) + proxy_headers = litellm_params.get("proxy_server_request", {}).get( + "headers", {} + ) for metadata_param_key in proxy_headers: if metadata_param_key.startswith("langfuse_"): trace_param_key = metadata_param_key.replace("langfuse_", "", 1) if trace_param_key in metadata: - verbose_logger.warning(f"Overwriting Langfuse `{trace_param_key}` from request header") + verbose_logger.warning( + f"Overwriting Langfuse `{trace_param_key}` from request header" + ) else: - verbose_logger.debug(f"Found Langfuse `{trace_param_key}` in request header") + verbose_logger.debug( + f"Found Langfuse `{trace_param_key}` in request header" + ) metadata[trace_param_key] = proxy_headers.get(metadata_param_key) return metadata @@ -205,9 +211,11 @@ class LangFuseLogger: verbose_logger.info(f"Langfuse Layer Logging - logging success") return {"trace_id": trace_id, "generation_id": generation_id} - except: - traceback.print_exc() - verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}") + except Exception as e: + verbose_logger.error( + "Langfuse Layer Error(): Exception occured - {}".format(str(e)) + ) + verbose_logger.debug(traceback.format_exc()) return {"trace_id": None, "generation_id": None} async def _async_log_event( diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py index 3e25b4ee77..48185afeea 100644 --- a/litellm/integrations/langsmith.py +++ b/litellm/integrations/langsmith.py @@ -44,7 +44,9 @@ class LangsmithLogger: print_verbose( f"Langsmith Logging - project_name: {project_name}, run_name {run_name}" ) - langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com") + langsmith_base_url = os.getenv( + "LANGSMITH_BASE_URL", "https://api.smith.langchain.com" + ) try: print_verbose( @@ -89,9 +91,7 @@ class LangsmithLogger: } url = f"{langsmith_base_url}/runs" - print_verbose( - f"Langsmith Logging - About to send data to {url} ..." - ) + print_verbose(f"Langsmith Logging - About to send data to {url} ...") response = requests.post( url=url, json=data, @@ -106,6 +106,5 @@ class LangsmithLogger: f"Langsmith Layer Logging - final response object: {response_obj}" ) except: - # traceback.print_exc() print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/logfire_logger.py b/litellm/integrations/logfire_logger.py index e27d848fb4..b4ab00820e 100644 --- a/litellm/integrations/logfire_logger.py +++ b/litellm/integrations/logfire_logger.py @@ -171,7 +171,6 @@ class LogfireLogger: f"Logfire Layer Logging - final response object: {response_obj}" ) except Exception as e: - traceback.print_exc() verbose_logger.debug( f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}" ) diff --git a/litellm/integrations/lunary.py b/litellm/integrations/lunary.py index 2e16e44a14..141ea64884 100644 --- a/litellm/integrations/lunary.py +++ b/litellm/integrations/lunary.py @@ -14,6 +14,7 @@ def parse_usage(usage): "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0, } + def parse_tool_calls(tool_calls): if tool_calls is None: return None @@ -26,13 +27,13 @@ def parse_tool_calls(tool_calls): "function": { "name": tool_call.function.name, "arguments": tool_call.function.arguments, - } + }, } return serialized - + return [clean_tool_call(tool_call) for tool_call in tool_calls] - + def parse_messages(input): @@ -176,6 +177,5 @@ class LunaryLogger: ) except: - # traceback.print_exc() print_verbose(f"Lunary Logging Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 6fbc6ca4ce..af0d1d310b 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -109,8 +109,8 @@ class PrometheusLogger: end_user_id, user_api_key, model, user_api_team, user_id ).inc() except Exception as e: - traceback.print_exc() - verbose_logger.debug( - f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}" + verbose_logger.error( + "prometheus Layer Error(): Exception occured - {}".format(str(e)) ) + verbose_logger.debug(traceback.format_exc()) pass diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py index d131e44f0e..0796d1048b 100644 --- a/litellm/integrations/s3.py +++ b/litellm/integrations/s3.py @@ -180,6 +180,5 @@ class S3Logger: print_verbose(f"s3 Layer Logging - final response object: {response_obj}") return response except Exception as e: - traceback.print_exc() verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}") pass diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py index 4e6bf517f3..7309342e4c 100644 --- a/litellm/integrations/supabase.py +++ b/litellm/integrations/supabase.py @@ -110,6 +110,5 @@ class Supabase: ) except: - # traceback.print_exc() print_verbose(f"Supabase Logging Error - {traceback.format_exc()}") pass diff --git a/litellm/integrations/weights_biases.py b/litellm/integrations/weights_biases.py index a56233b22f..1ac535c4f2 100644 --- a/litellm/integrations/weights_biases.py +++ b/litellm/integrations/weights_biases.py @@ -217,6 +217,5 @@ class WeightsBiasesLogger: f"W&B Logging Logging - final response object: {response_obj}" ) except: - # traceback.print_exc() print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}") pass diff --git a/litellm/llms/gemini.py b/litellm/llms/gemini.py index a55b39aef9..cfdf39eca2 100644 --- a/litellm/llms/gemini.py +++ b/litellm/llms/gemini.py @@ -1,13 +1,14 @@ -import os, types, traceback, copy, asyncio -import json -from enum import Enum +import types +import traceback +import copy import time from typing import Callable, Optional -from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage +from litellm.utils import ModelResponse, Choices, Message, Usage import litellm -import sys, httpx +import httpx from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt from packaging.version import Version +from litellm import verbose_logger class GeminiError(Exception): @@ -264,7 +265,8 @@ def completion( choices_list.append(choice_obj) model_response["choices"] = choices_list except Exception as e: - traceback.print_exc() + verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e))) + verbose_logger.debug(traceback.format_exc()) raise GeminiError( message=traceback.format_exc(), status_code=response.status_code ) @@ -356,7 +358,8 @@ async def async_completion( choices_list.append(choice_obj) model_response["choices"] = choices_list except Exception as e: - traceback.print_exc() + verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e))) + verbose_logger.debug(traceback.format_exc()) raise GeminiError( message=traceback.format_exc(), status_code=response.status_code ) diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 283878056f..bc2740534f 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -6,6 +6,7 @@ from typing import Optional import litellm import httpx, aiohttp, asyncio # type: ignore from .prompt_templates.factory import prompt_factory, custom_prompt +from litellm import verbose_logger class OllamaError(Exception): @@ -124,6 +125,7 @@ class OllamaConfig: ) and v is not None } + def get_supported_openai_params( self, ): @@ -138,10 +140,12 @@ class OllamaConfig: "response_format", ] + # ollama wants plain base64 jpeg/png files as images. strip any leading dataURI # and convert to jpeg if necessary. def _convert_image(image): import base64, io + try: from PIL import Image except: @@ -391,7 +395,13 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob async for transformed_chunk in streamwrapper: yield transformed_chunk except Exception as e: - traceback.print_exc() + verbose_logger.error( + "LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) + raise e @@ -455,7 +465,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): ) return model_response except Exception as e: - traceback.print_exc() + verbose_logger.error( + "LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) raise e diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index a058077227..a7439bbcc0 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -1,11 +1,15 @@ from itertools import chain -import requests, types, time -import json, uuid +import requests +import types +import time +import json +import uuid import traceback from typing import Optional +from litellm import verbose_logger import litellm -import httpx, aiohttp, asyncio -from .prompt_templates.factory import prompt_factory, custom_prompt +import httpx +import aiohttp class OllamaError(Exception): @@ -299,7 +303,10 @@ def get_ollama_response( tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -307,7 +314,9 @@ def get_ollama_response( model_response["choices"][0]["message"] = message model_response["choices"][0]["finish_reason"] = "tool_calls" else: - model_response["choices"][0]["message"]["content"] = response_json["message"]["content"] + model_response["choices"][0]["message"]["content"] = response_json["message"][ + "content" + ] model_response["created"] = int(time.time()) model_response["model"] = "ollama/" + model prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore @@ -361,7 +370,10 @@ def ollama_completion_stream(url, api_key, data, logging_obj): tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -410,9 +422,10 @@ async def ollama_async_streaming( first_chunk_content = first_chunk.choices[0].delta.content or "" response_content = first_chunk_content + "".join( [ - chunk.choices[0].delta.content - async for chunk in streamwrapper - if chunk.choices[0].delta.content] + chunk.choices[0].delta.content + async for chunk in streamwrapper + if chunk.choices[0].delta.content + ] ) function_call = json.loads(response_content) delta = litellm.utils.Delta( @@ -420,7 +433,10 @@ async def ollama_async_streaming( tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -433,7 +449,8 @@ async def ollama_async_streaming( async for transformed_chunk in streamwrapper: yield transformed_chunk except Exception as e: - traceback.print_exc() + verbose_logger.error("LiteLLM.gemini(): Exception occured - {}".format(str(e))) + verbose_logger.debug(traceback.format_exc()) async def ollama_acompletion( @@ -483,7 +500,10 @@ async def ollama_acompletion( tool_calls=[ { "id": f"call_{str(uuid.uuid4())}", - "function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, + "function": { + "name": function_call["name"], + "arguments": json.dumps(function_call["arguments"]), + }, "type": "function", } ], @@ -491,7 +511,9 @@ async def ollama_acompletion( model_response["choices"][0]["message"] = message model_response["choices"][0]["finish_reason"] = "tool_calls" else: - model_response["choices"][0]["message"]["content"] = response_json["message"]["content"] + model_response["choices"][0]["message"]["content"] = response_json[ + "message" + ]["content"] model_response["created"] = int(time.time()) model_response["model"] = "ollama_chat/" + data["model"] @@ -509,5 +531,9 @@ async def ollama_acompletion( ) return model_response except Exception as e: - traceback.print_exc() + verbose_logger.error( + "LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e)) + ) + verbose_logger.debug(traceback.format_exc()) + raise e diff --git a/litellm/llms/palm.py b/litellm/llms/palm.py index f15be43db4..4d9953e77a 100644 --- a/litellm/llms/palm.py +++ b/litellm/llms/palm.py @@ -1,11 +1,12 @@ -import os, types, traceback, copy -import json -from enum import Enum +import types +import traceback +import copy import time from typing import Callable, Optional -from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage +from litellm.utils import ModelResponse, Choices, Message, Usage import litellm -import sys, httpx +import httpx +from litellm import verbose_logger class PalmError(Exception): @@ -165,7 +166,10 @@ def completion( choices_list.append(choice_obj) model_response["choices"] = choices_list except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e)) + ) + verbose_logger.debug(traceback.format_exc()) raise PalmError( message=traceback.format_exc(), status_code=response.status_code ) diff --git a/litellm/main.py b/litellm/main.py index f76d6c5213..65022edb0d 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -364,7 +364,10 @@ async def acompletion( ) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls) return response except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.acompletion(): Exception occured - {}".format(str(e)) + ) + verbose_logger.debug(traceback.format_exc()) custom_llm_provider = custom_llm_provider or "openai" raise exception_type( model=model, @@ -477,7 +480,10 @@ def mock_completion( except Exception as e: if isinstance(e, openai.APIError): raise e - traceback.print_exc() + verbose_logger.error( + "litellm.mock_completion(): Exception occured - {}".format(str(e)) + ) + verbose_logger.debug(traceback.format_exc()) raise Exception("Mock completion response failed") @@ -4430,7 +4436,10 @@ async def ahealth_check( response = {} # args like remaining ratelimit etc. return response except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.ahealth_check(): Exception occured - {}".format(str(e)) + ) + verbose_logger.debug(traceback.format_exc()) stack_trace = traceback.format_exc() if isinstance(stack_trace, str): stack_trace = stack_trace[:1000] diff --git a/litellm/proxy/_logging.py b/litellm/proxy/_logging.py index 22cbd88cb7..f453cef395 100644 --- a/litellm/proxy/_logging.py +++ b/litellm/proxy/_logging.py @@ -1,6 +1,7 @@ import json import logging from logging import Formatter +import sys class JsonFormatter(Formatter): diff --git a/litellm/proxy/hooks/azure_content_safety.py b/litellm/proxy/hooks/azure_content_safety.py index 5b5139f8c7..47ba36a683 100644 --- a/litellm/proxy/hooks/azure_content_safety.py +++ b/litellm/proxy/hooks/azure_content_safety.py @@ -88,7 +88,7 @@ class _PROXY_AzureContentSafety( verbose_proxy_logger.debug( "Error in Azure Content-Safety: %s", traceback.format_exc() ) - traceback.print_exc() + verbose_proxy_logger.debug(traceback.format_exc()) raise result = self._compute_result(response) @@ -123,7 +123,12 @@ class _PROXY_AzureContentSafety( except HTTPException as e: raise e except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) async def async_post_call_success_hook( self, diff --git a/litellm/proxy/hooks/batch_redis_get.py b/litellm/proxy/hooks/batch_redis_get.py index 64541c1bff..d506109b81 100644 --- a/litellm/proxy/hooks/batch_redis_get.py +++ b/litellm/proxy/hooks/batch_redis_get.py @@ -94,7 +94,12 @@ class _PROXY_BatchRedisRequests(CustomLogger): except HTTPException as e: raise e except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) async def async_get_cache(self, *args, **kwargs): """ diff --git a/litellm/proxy/hooks/cache_control_check.py b/litellm/proxy/hooks/cache_control_check.py index 3160fe97ad..89971a0bf7 100644 --- a/litellm/proxy/hooks/cache_control_check.py +++ b/litellm/proxy/hooks/cache_control_check.py @@ -1,13 +1,13 @@ # What this does? ## Checks if key is allowed to use the cache controls passed in to the completion() call -from typing import Optional import litellm +from litellm import verbose_logger from litellm.caching import DualCache from litellm.proxy._types import UserAPIKeyAuth from litellm.integrations.custom_logger import CustomLogger from fastapi import HTTPException -import json, traceback +import traceback class _PROXY_CacheControlCheck(CustomLogger): @@ -54,4 +54,9 @@ class _PROXY_CacheControlCheck(CustomLogger): except HTTPException as e: raise e except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) diff --git a/litellm/proxy/hooks/max_budget_limiter.py b/litellm/proxy/hooks/max_budget_limiter.py index 442cc53e37..c4b328bab0 100644 --- a/litellm/proxy/hooks/max_budget_limiter.py +++ b/litellm/proxy/hooks/max_budget_limiter.py @@ -1,10 +1,10 @@ -from typing import Optional +from litellm import verbose_logger import litellm from litellm.caching import DualCache from litellm.proxy._types import UserAPIKeyAuth from litellm.integrations.custom_logger import CustomLogger from fastapi import HTTPException -import json, traceback +import traceback class _PROXY_MaxBudgetLimiter(CustomLogger): @@ -44,4 +44,9 @@ class _PROXY_MaxBudgetLimiter(CustomLogger): except HTTPException as e: raise e except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) diff --git a/litellm/proxy/hooks/presidio_pii_masking.py b/litellm/proxy/hooks/presidio_pii_masking.py index 95a6e9c3c8..e64e69c457 100644 --- a/litellm/proxy/hooks/presidio_pii_masking.py +++ b/litellm/proxy/hooks/presidio_pii_masking.py @@ -8,8 +8,8 @@ # Tell us how we can improve! - Krrish & Ishaan -from typing import Optional, Literal, Union -import litellm, traceback, sys, uuid, json +from typing import Optional, Union +import litellm, traceback, uuid, json # noqa: E401 from litellm.caching import DualCache from litellm.proxy._types import UserAPIKeyAuth from litellm.integrations.custom_logger import CustomLogger @@ -21,8 +21,8 @@ from litellm.utils import ( ImageResponse, StreamingChoices, ) -from datetime import datetime -import aiohttp, asyncio +import aiohttp +import asyncio class _OPTIONAL_PresidioPIIMasking(CustomLogger): @@ -138,7 +138,12 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger): else: raise Exception(f"Invalid anonymizer response: {redacted_text}") except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) raise e async def async_pre_call_hook( diff --git a/litellm/proxy/hooks/prompt_injection_detection.py b/litellm/proxy/hooks/prompt_injection_detection.py index 08dbedd8c8..ed33e3b519 100644 --- a/litellm/proxy/hooks/prompt_injection_detection.py +++ b/litellm/proxy/hooks/prompt_injection_detection.py @@ -204,7 +204,12 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger): return e.detail["error"] raise e except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) async def async_moderation_hook( self, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8cf2fa118d..4970ad4719 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -125,7 +125,10 @@ from litellm.router import ( AssistantsTypedDict, ) from litellm.router import ModelInfo as RouterModelInfo -from litellm._logging import verbose_router_logger, verbose_proxy_logger +from litellm._logging import ( + verbose_router_logger, + verbose_proxy_logger, +) from litellm.proxy.auth.handle_jwt import JWTHandler from litellm.proxy.auth.litellm_license import LicenseCheck from litellm.proxy.auth.model_checks import ( @@ -1471,7 +1474,12 @@ async def user_api_key_auth( else: raise Exception() except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, litellm.BudgetExceededError): raise ProxyException( message=e.message, type="auth_error", param=None, code=400 @@ -3476,7 +3484,12 @@ async def generate_key_helper_fn( ) key_data["token_id"] = getattr(create_key_response, "token", None) except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise e raise HTTPException( @@ -3515,7 +3528,12 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None) else: raise Exception("DB not connected. prisma_client is None") except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) raise e return deleted_tokens @@ -3676,7 +3694,12 @@ async def async_assistants_data_generator( done_message = "[DONE]" yield f"data: {done_message}\n\n" except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, @@ -3686,9 +3709,6 @@ async def async_assistants_data_generator( f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" ) router_model_names = llm_router.model_names if llm_router is not None else [] - if user_debug: - traceback.print_exc() - if isinstance(e, HTTPException): raise e else: @@ -3728,7 +3748,12 @@ async def async_data_generator( done_message = "[DONE]" yield f"data: {done_message}\n\n" except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, @@ -3738,8 +3763,6 @@ async def async_data_generator( f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" ) router_model_names = llm_router.model_names if llm_router is not None else [] - if user_debug: - traceback.print_exc() if isinstance(e, HTTPException): raise e @@ -4386,7 +4409,12 @@ async def chat_completion( return _chat_response except Exception as e: data["litellm_status"] = "fail" # used for alerting - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) @@ -4397,8 +4425,6 @@ async def chat_completion( litellm_debug_info, ) router_model_names = llm_router.model_names if llm_router is not None else [] - if user_debug: - traceback.print_exc() if isinstance(e, HTTPException): raise ProxyException( @@ -4630,15 +4656,12 @@ async def completion( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY") - litellm_debug_info = getattr(e, "litellm_debug_info", "") - verbose_proxy_logger.debug( - "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`", - e, - litellm_debug_info, + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.completion(): Exception occured - {}".format( + str(e) + ) ) - traceback.print_exc() - error_traceback = traceback.format_exc() + verbose_proxy_logger.debug(traceback.format_exc()) error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -4848,7 +4871,12 @@ async def embeddings( e, litellm_debug_info, ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e)), @@ -5027,7 +5055,12 @@ async def image_generation( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e)), @@ -5205,7 +5238,12 @@ async def audio_speech( ) except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) raise e @@ -5394,7 +5432,12 @@ async def audio_transcriptions( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -5403,7 +5446,6 @@ async def audio_transcriptions( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -5531,7 +5573,12 @@ async def get_assistants( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -5540,7 +5587,6 @@ async def get_assistants( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -5660,7 +5706,12 @@ async def create_threads( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -5669,7 +5720,6 @@ async def create_threads( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -5788,7 +5838,12 @@ async def get_thread( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -5797,7 +5852,6 @@ async def get_thread( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -5919,7 +5973,12 @@ async def add_messages( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -5928,7 +5987,6 @@ async def add_messages( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -6046,7 +6104,12 @@ async def get_messages( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -6055,7 +6118,6 @@ async def get_messages( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -6187,7 +6249,12 @@ async def run_thread( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -6196,7 +6263,6 @@ async def run_thread( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -6335,7 +6401,12 @@ async def create_batch( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -6344,7 +6415,6 @@ async def create_batch( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -6478,7 +6548,12 @@ async def retrieve_batch( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -6631,7 +6706,12 @@ async def create_file( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.create_file(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), @@ -6640,7 +6720,6 @@ async def create_file( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -6816,7 +6895,12 @@ async def moderations( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.moderations(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e)), @@ -6825,7 +6909,6 @@ async def moderations( code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), ) else: - error_traceback = traceback.format_exc() error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), @@ -7136,7 +7219,12 @@ async def generate_key_fn( return GenerateKeyResponse(**response) except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -9591,7 +9679,12 @@ async def user_info( } return response_data except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.user_info(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -9686,7 +9779,12 @@ async def user_update(data: UpdateUserRequest): return response # update based on remaining passed in values except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.user_update(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -9739,7 +9837,12 @@ async def user_request_model(request: Request): return {"status": "success"} # update based on remaining passed in values except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.user_request_model(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -9781,7 +9884,12 @@ async def user_get_requests(): return {"requests": response} # update based on remaining passed in values except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.user_get_requests(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -10171,7 +10279,12 @@ async def update_end_user( # update based on remaining passed in values except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Internal Server Error({str(e)})"), @@ -10255,7 +10368,12 @@ async def delete_end_user( # update based on remaining passed in values except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Internal Server Error({str(e)})"), @@ -11558,7 +11676,12 @@ async def add_new_model( return model_response except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -11672,7 +11795,12 @@ async def update_model( return model_response except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.update_model(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -13906,7 +14034,12 @@ async def update_config(config_info: ConfigYAML): return {"message": "Config updated successfully"} except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.update_config(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -14379,7 +14512,12 @@ async def get_config(): "available_callbacks": all_available_callbacks, } except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.get_config(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -14630,7 +14768,12 @@ async def health_services_endpoint( } except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -14709,7 +14852,12 @@ async def health_endpoint( "unhealthy_count": len(unhealthy_endpoints), } except Exception as e: - traceback.print_exc() + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) raise e diff --git a/litellm/router.py b/litellm/router.py index e3fed496f1..1267a68a0e 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2096,8 +2096,8 @@ class Router: except Exception as e: raise e except Exception as e: - verbose_router_logger.debug(f"An exception occurred - {str(e)}") - traceback.print_exc() + verbose_router_logger.error(f"An exception occurred - {str(e)}") + verbose_router_logger.debug(traceback.format_exc()) raise original_exception async def async_function_with_retries(self, *args, **kwargs): diff --git a/litellm/router_strategy/lowest_cost.py b/litellm/router_strategy/lowest_cost.py index 1670490e16..46cbb2181e 100644 --- a/litellm/router_strategy/lowest_cost.py +++ b/litellm/router_strategy/lowest_cost.py @@ -1,11 +1,9 @@ #### What this does #### # picks based on response time (for streaming, this is time to first token) -from pydantic import BaseModel, Extra, Field, root_validator -import os, requests, random # type: ignore +from pydantic import BaseModel from typing import Optional, Union, List, Dict from datetime import datetime, timedelta -import random - +from litellm import verbose_logger import traceback from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger @@ -119,7 +117,12 @@ class LowestCostLoggingHandler(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): @@ -201,7 +204,12 @@ class LowestCostLoggingHandler(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass async def async_get_available_deployments( diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py index 1e4b151ada..5d71847510 100644 --- a/litellm/router_strategy/lowest_latency.py +++ b/litellm/router_strategy/lowest_latency.py @@ -1,16 +1,16 @@ #### What this does #### # picks based on response time (for streaming, this is time to first token) -from pydantic import BaseModel, Extra, Field, root_validator # type: ignore -import dotenv, os, requests, random # type: ignore +from pydantic import BaseModel +import random from typing import Optional, Union, List, Dict from datetime import datetime, timedelta -import random import traceback from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger from litellm import ModelResponse from litellm import token_counter import litellm +from litellm import verbose_logger class LiteLLMBase(BaseModel): @@ -165,7 +165,12 @@ class LowestLatencyLoggingHandler(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): @@ -229,7 +234,12 @@ class LowestLatencyLoggingHandler(CustomLogger): # do nothing if it's not a timeout error return except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): @@ -352,7 +362,12 @@ class LowestLatencyLoggingHandler(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass def get_available_deployments( diff --git a/litellm/router_strategy/lowest_tpm_rpm.py b/litellm/router_strategy/lowest_tpm_rpm.py index 15460051b8..a21c69abf7 100644 --- a/litellm/router_strategy/lowest_tpm_rpm.py +++ b/litellm/router_strategy/lowest_tpm_rpm.py @@ -11,6 +11,7 @@ from litellm.integrations.custom_logger import CustomLogger from litellm._logging import verbose_router_logger from litellm.utils import print_verbose + class LiteLLMBase(BaseModel): """ Implements default functions, all pydantic objects should have. @@ -23,16 +24,20 @@ class LiteLLMBase(BaseModel): # if using pydantic v1 return self.dict() + class RoutingArgs(LiteLLMBase): - ttl: int = 1 * 60 # 1min (RPM/TPM expire key) - + ttl: int = 1 * 60 # 1min (RPM/TPM expire key) + + class LowestTPMLoggingHandler(CustomLogger): test_flag: bool = False logged_success: int = 0 logged_failure: int = 0 default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour - def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}): + def __init__( + self, router_cache: DualCache, model_list: list, routing_args: dict = {} + ): self.router_cache = router_cache self.model_list = model_list self.routing_args = RoutingArgs(**routing_args) @@ -72,19 +77,28 @@ class LowestTPMLoggingHandler(CustomLogger): request_count_dict = self.router_cache.get_cache(key=tpm_key) or {} request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens - self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl) + self.router_cache.set_cache( + key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl + ) ## RPM request_count_dict = self.router_cache.get_cache(key=rpm_key) or {} request_count_dict[id] = request_count_dict.get(id, 0) + 1 - self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl) + self.router_cache.set_cache( + key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl + ) ### TESTING ### if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_router_logger.error( + "litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format( + str(e) + ) + ) + verbose_router_logger.debug(traceback.format_exc()) pass async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): @@ -123,19 +137,28 @@ class LowestTPMLoggingHandler(CustomLogger): request_count_dict = self.router_cache.get_cache(key=tpm_key) or {} request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens - self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl) + self.router_cache.set_cache( + key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl + ) ## RPM request_count_dict = self.router_cache.get_cache(key=rpm_key) or {} request_count_dict[id] = request_count_dict.get(id, 0) + 1 - self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl) + self.router_cache.set_cache( + key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl + ) ### TESTING ### if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_router_logger.error( + "litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format( + str(e) + ) + ) + verbose_router_logger.debug(traceback.format_exc()) pass def get_available_deployments( diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index 40e75031ad..e3b8c8b770 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -1,19 +1,19 @@ #### What this does #### # identifies lowest tpm deployment from pydantic import BaseModel -import dotenv, os, requests, random +import random from typing import Optional, Union, List, Dict -import datetime as datetime_og -from datetime import datetime -import traceback, asyncio, httpx +import traceback +import httpx import litellm from litellm import token_counter from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger -from litellm._logging import verbose_router_logger +from litellm._logging import verbose_router_logger, verbose_logger from litellm.utils import print_verbose, get_utc_datetime from litellm.types.router import RouterErrors + class LiteLLMBase(BaseModel): """ Implements default functions, all pydantic objects should have. @@ -22,12 +22,14 @@ class LiteLLMBase(BaseModel): def json(self, **kwargs): try: return self.model_dump() # noqa - except: + except Exception as e: # if using pydantic v1 return self.dict() + class RoutingArgs(LiteLLMBase): - ttl: int = 1 * 60 # 1min (RPM/TPM expire key) + ttl: int = 1 * 60 # 1min (RPM/TPM expire key) + class LowestTPMLoggingHandler_v2(CustomLogger): """ @@ -47,7 +49,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger): logged_failure: int = 0 default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour - def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}): + def __init__( + self, router_cache: DualCache, model_list: list, routing_args: dict = {} + ): self.router_cache = router_cache self.model_list = model_list self.routing_args = RoutingArgs(**routing_args) @@ -104,7 +108,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger): ) else: # if local result below limit, check redis ## prevent unnecessary redis checks - result = self.router_cache.increment_cache(key=rpm_key, value=1, ttl=self.routing_args.ttl) + result = self.router_cache.increment_cache( + key=rpm_key, value=1, ttl=self.routing_args.ttl + ) if result is not None and result > deployment_rpm: raise litellm.RateLimitError( message="Deployment over defined rpm limit={}. current usage={}".format( @@ -244,12 +250,19 @@ class LowestTPMLoggingHandler_v2(CustomLogger): # update cache ## TPM - self.router_cache.increment_cache(key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl) + self.router_cache.increment_cache( + key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl + ) ### TESTING ### if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): @@ -295,7 +308,12 @@ class LowestTPMLoggingHandler_v2(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) pass def _common_checks_available_deployment( diff --git a/litellm/utils.py b/litellm/utils.py index 1788600941..fff741d469 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1372,8 +1372,12 @@ class Logging: callback_func=callback, ) except Exception as e: - traceback.print_exc() - print_verbose( + verbose_logger.error( + "litellm.Logging.pre_call(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}" ) print_verbose( @@ -10526,7 +10530,12 @@ class CustomStreamWrapper: "finish_reason": finish_reason, } except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) raise e def handle_huggingface_chunk(self, chunk): @@ -10570,7 +10579,12 @@ class CustomStreamWrapper: "finish_reason": finish_reason, } except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) raise e def handle_ai21_chunk(self, chunk): # fake streaming @@ -10805,7 +10819,12 @@ class CustomStreamWrapper: "usage": usage, } except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) raise e def handle_azure_text_completion_chunk(self, chunk): @@ -10886,7 +10905,12 @@ class CustomStreamWrapper: else: return "" except: - traceback.print_exc() + verbose_logger.error( + "litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) return "" def handle_cloudlfare_stream(self, chunk): @@ -11085,7 +11109,12 @@ class CustomStreamWrapper: "is_finished": True, } except: - traceback.print_exc() + verbose_logger.error( + "litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) return "" def model_response_creator(self): @@ -11557,7 +11586,12 @@ class CustomStreamWrapper: tool["type"] = "function" model_response.choices[0].delta = Delta(**_json_delta) except Exception as e: - traceback.print_exc() + verbose_logger.error( + "litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format( + str(e) + ) + ) + verbose_logger.debug(traceback.format_exc()) model_response.choices[0].delta = Delta() else: try: