diff --git a/.flake8 b/.flake8 new file mode 100644 index 000000000..b51cc0045 --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +ignore = E,F,W,B,B9,C,D,I,N,S,W503,W504,E203, TCE,TCA,EXE999,E999,TD \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..8bda916bc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,8 @@ +repos: +- repo: https://github.com/pycqa/flake8 + rev: 3.8.4 # The version of flake8 to use + hooks: + - id: flake8 + exclude: ^litellm/tests/|^litellm/proxy/|^litellm/integrations/ + additional_dependencies: [flake8-print] + files: litellm/.*\.py \ No newline at end of file diff --git a/litellm/budget_manager.py b/litellm/budget_manager.py index 77a1e51f2..6a9d1e520 100644 --- a/litellm/budget_manager.py +++ b/litellm/budget_manager.py @@ -14,7 +14,8 @@ class BudgetManager: def print_verbose(self, print_statement): if litellm.set_verbose: - print(print_statement) + import logging + logging.info(print_statement) def load_data(self): if self.client_type == "local": @@ -149,8 +150,6 @@ class BudgetManager: 'project_name' : self.project_name, "user_dict": self.user_dict } - print(f"data: {data}") response = requests.post(url, headers=headers, json=data) - print(f"response: {response.text}") response = response.json() return response \ No newline at end of file diff --git a/litellm/caching.py b/litellm/caching.py index 667eff622..9632a6b03 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -8,7 +8,7 @@ # Thank you users! We ❤️ you! - Krrish & Ishaan import litellm -import time +import time, logging import json, traceback @@ -37,7 +37,6 @@ class RedisCache(BaseCache): def __init__(self, host, port, password): import redis # if users don't provider one, use the default litellm cache - print(f"HOST: {host}; PORT: {port}; PASSWORD: {password}") self.redis_client = redis.Redis(host=host, port=port, password=password) def set_cache(self, key, value, **kwargs): @@ -46,7 +45,7 @@ class RedisCache(BaseCache): self.redis_client.set(name=key, value=str(value), ex=ttl) except Exception as e: # NON blocking - notify users Redis is throwing an exception - print("LiteLLM Caching: set() - Got exception from REDIS : ", e) + logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e) def get_cache(self, key, **kwargs): try: @@ -61,13 +60,13 @@ class RedisCache(BaseCache): except Exception as e: # NON blocking - notify users Redis is throwing an exception traceback.print_exc() - print("LiteLLM Caching: get() - Got exception from REDIS: ", e) + logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e) class HostedCache(BaseCache): def set_cache(self, key, value, **kwargs): if "ttl" in kwargs: - print("LiteLLM Caching: TTL is not supported for hosted cache!") + logging.debug("LiteLLM Caching: TTL is not supported for hosted cache!") # make a post request to api.litellm.ai/set_cache import requests url = f"https://api.litellm.ai/set_cache?key={key}&value={str(value)}" @@ -200,12 +199,10 @@ class Cache: cached_result = self.cache.get_cache(cache_key) if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True: # if streaming is true and we got a cache hit, return a generator - # print("cache hit and stream=True") - # print(cached_result) return self.generate_streaming_content(cached_result["choices"][0]['message']['content']) return cached_result except Exception as e: - print(f"An exception occurred: {traceback.format_exc()}") + logging.debug(f"An exception occurred: {traceback.format_exc()}") return None def add_cache(self, result, *args, **kwargs): @@ -224,10 +221,7 @@ class Cache: cache_key = kwargs["cache_key"] else: cache_key = self.get_cache_key(*args, **kwargs) - # print("adding to cache", cache_key, result) - # print(cache_key) if cache_key is not None: - # print("adding to cache", cache_key, result) self.cache.set_cache(cache_key, result, **kwargs) except: pass diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 20a1c1fde..e6f48a5bd 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -36,10 +36,6 @@ class LangFuseLogger: print_verbose( f"Langfuse Logging - Enters logging function for model {kwargs}" ) - # print(response_obj) - # print(response_obj['choices'][0]['message']['content']) - # print(response_obj['usage']['prompt_tokens']) - # print(response_obj['usage']['completion_tokens']) metadata = kwargs.get("metadata", {}) prompt = [kwargs['messages']] diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index e1835d8c0..20e96686f 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -397,7 +397,6 @@ def completion( outputText = response_body.get('results')[0].get('outputText') response_metadata = response.get("ResponseMetadata", {}) - print(f"response_metadata: {response_metadata}") if response_metadata.get("HTTPStatusCode", 500) >= 400: raise BedrockError( message=outputText, diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 3a0530803..add9c8d7f 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -147,7 +147,6 @@ def get_ollama_response_stream( yield completion_obj except Exception as e: traceback.print_exc() - print(f"Error decoding JSON: {e}") session.close() if async_generator_imported: @@ -198,5 +197,6 @@ if async_generator_imported: completion_obj["content"] = j["response"] await yield_({"choices": [{"delta": completion_obj}]}) except Exception as e: - print(f"Error decoding JSON: {e}") + import logging + logging.debug(f"Error decoding JSON: {e}") session.close() \ No newline at end of file diff --git a/litellm/main.py b/litellm/main.py index 143aa29ea..b4bb6d9a2 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1624,13 +1624,9 @@ def batch_completion_models_all_responses(*args, **kwargs): with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor: for idx, model in enumerate(models): - print(f"{GREEN}LiteLLM: Making request to model: {model}{RESET}") future = executor.submit(completion, *args, model=model, **kwargs) if future.result() is not None: responses.append(future.result()) - print(f"{GREEN}LiteLLM: Model {model} returned response{RESET}") - else: - print(f"{RED}LiteLLM: Model {model } did not return a response{RESET}") return responses @@ -1863,6 +1859,7 @@ def embedding( ###### Text Completion ################ def text_completion(*args, **kwargs): + global print_verbose import copy """ This maps to the Openai.Completion.create format, which has a different I/O (accepts prompt, returning ["choices"]["text"]. @@ -1930,7 +1927,7 @@ def text_completion(*args, **kwargs): raw_response = response._hidden_params.get("original_response", None) transformed_logprobs = litellm.utils.transform_logprobs(raw_response) except Exception as e: - print("LiteLLM non blocking exception", e) + print_verbose("LiteLLM non blocking exception", e) text_completion_response["id"] = response["id"] text_completion_response["object"] = "text_completion" text_completion_response["created"] = response["created"] @@ -1964,7 +1961,8 @@ def moderation(input: str, api_key: Optional[str]=None): ## Set verbose to true -> ```litellm.set_verbose = True``` def print_verbose(print_statement): if litellm.set_verbose: - print(f"LiteLLM: {print_statement}") + import logging + logging.info(f"LiteLLM: {print_statement}") def config_completion(**kwargs): if litellm.config_path != None: diff --git a/litellm/utils.py b/litellm/utils.py index 7e4587700..935f5e36c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -285,7 +285,8 @@ class TextCompletionResponse(OpenAIObject): ############################################################ def print_verbose(print_statement): if litellm.set_verbose: - print(f"LiteLLM: {print_statement}") + import logging + logging.info(f"LiteLLM: {print_statement}") ####### LOGGING ################### from enum import Enum @@ -538,8 +539,6 @@ class Logging: print_verbose("reaches api manager for updating model cost") litellm.apiManager.update_cost(completion_obj=result, user=self.user) if callback == "cache": - # print("entering logger first time") - # print(self.litellm_params["stream_response"]) if litellm.cache != None and self.model_call_details.get('optional_params', {}).get('stream', False) == True: litellm_call_id = self.litellm_params["litellm_call_id"] if litellm_call_id in self.litellm_params["stream_response"]: @@ -550,10 +549,7 @@ class Logging: self.litellm_params["stream_response"][litellm_call_id]["choices"][0]["message"]["content"] += result["content"] else: # init a streaming response for this call id new_model_response = ModelResponse(choices=[Choices(message=Message(content="default"))]) - #print("creating new model response") - #print(new_model_response) self.litellm_params["stream_response"][litellm_call_id] = new_model_response - #print("adding to cache for", litellm_call_id) litellm.cache.add_cache(self.litellm_params["stream_response"][litellm_call_id], **self.model_call_details) if callback == "promptlayer": print_verbose("reaches promptlayer for logging!") @@ -576,7 +572,6 @@ class Logging: print_verbose("reaches supabase for streaming logging!") result = kwargs["complete_streaming_response"] - # print(kwargs) model = kwargs["model"] messages = kwargs["messages"] optional_params = kwargs.get("optional_params", {}) @@ -732,11 +727,11 @@ def exception_logging( model_call_details ) # Expectation: any logger function passed in by the user should accept a dict object except Exception as e: - print( + print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" ) except Exception as e: - print( + print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" ) pass @@ -799,7 +794,6 @@ def client(original_function): return logging_obj except Exception as e: # DO NOT BLOCK running the function because of this print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}") - print(e) pass def crash_reporting(*args, **kwargs): @@ -1776,9 +1770,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_ custom_llm_provider = "bedrock" if custom_llm_provider is None or custom_llm_provider=="": - print() - print("\033[1;31mProvider List: https://docs.litellm.ai/docs/providers\033[0m") - print() + print() # noqa + print("\033[1;31mProvider List: https://docs.litellm.ai/docs/providers\033[0m") # noqa + print() # noqa raise ValueError(f"LLM Provider NOT provided. Pass in the LLM provider you are trying to call. E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/{model}',..)` Learn more: https://docs.litellm.ai/docs/providers") return model, custom_llm_provider, dynamic_api_key, api_base except Exception as e: @@ -2772,7 +2766,7 @@ def get_all_keys(llm_provider=None): def get_model_list(): - global last_fetched_at + global last_fetched_at, print_verbose try: # if user is using hosted product -> get their updated model list user_email = ( @@ -2784,7 +2778,7 @@ def get_model_list(): if user_email: # make the api call last_fetched_at = time.time() - print(f"last_fetched_at: {last_fetched_at}") + print_verbose(f"last_fetched_at: {last_fetched_at}") response = requests.post( url="http://api.litellm.ai/get_model_list", headers={"content-type": "application/json"}, @@ -2820,10 +2814,10 @@ def exception_type( global user_logger_fn, liteDebuggerClient exception_mapping_worked = False if litellm.suppress_debug_info is False: - print() - print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m") - print("LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.") - print() + print() # noqa + print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m") # noqa + print("LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.") # noqa + print() # noqa try: if isinstance(original_exception, OriginalError): # Handle the OpenAIError @@ -3401,7 +3395,7 @@ def exception_type( model=model ) elif hasattr(original_exception, "status_code"): - print(f"status code: {original_exception.status_code}") + print_verbose(f"status code: {original_exception.status_code}") if original_exception.status_code == 401: exception_mapping_worked = True raise AuthenticationError( @@ -4267,12 +4261,11 @@ def completion_with_fallbacks(**kwargs): return response except Exception as e: - print(e) + print_verbose(e) rate_limited_models.add(model) model_expiration_times[model] = ( time.time() + 60 ) # cool down this selected model - # print(f"rate_limited_models {rate_limited_models}") pass return response @@ -4417,7 +4410,7 @@ def trim_messages( return final_messages except Exception as e: # [NON-Blocking, if error occurs just return final_messages - print("Got exception while token trimming", e) + print_verbose("Got exception while token trimming", e) return messages def get_valid_models():