forked from phoenix/litellm-mirror
refactor(all-files): removing all print statements; adding pre-commit + flake8 to prevent future regressions
This commit is contained in:
parent
38ff9f2b6f
commit
6b40546e59
9 changed files with 39 additions and 50 deletions
2
.flake8
Normal file
2
.flake8
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[flake8]
|
||||||
|
ignore = E,F,W,B,B9,C,D,I,N,S,W503,W504,E203, TCE,TCA,EXE999,E999,TD
|
8
.pre-commit-config.yaml
Normal file
8
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/pycqa/flake8
|
||||||
|
rev: 3.8.4 # The version of flake8 to use
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
exclude: ^litellm/tests/|^litellm/proxy/|^litellm/integrations/
|
||||||
|
additional_dependencies: [flake8-print]
|
||||||
|
files: litellm/.*\.py
|
|
@ -14,7 +14,8 @@ class BudgetManager:
|
||||||
|
|
||||||
def print_verbose(self, print_statement):
|
def print_verbose(self, print_statement):
|
||||||
if litellm.set_verbose:
|
if litellm.set_verbose:
|
||||||
print(print_statement)
|
import logging
|
||||||
|
logging.info(print_statement)
|
||||||
|
|
||||||
def load_data(self):
|
def load_data(self):
|
||||||
if self.client_type == "local":
|
if self.client_type == "local":
|
||||||
|
@ -149,8 +150,6 @@ class BudgetManager:
|
||||||
'project_name' : self.project_name,
|
'project_name' : self.project_name,
|
||||||
"user_dict": self.user_dict
|
"user_dict": self.user_dict
|
||||||
}
|
}
|
||||||
print(f"data: {data}")
|
|
||||||
response = requests.post(url, headers=headers, json=data)
|
response = requests.post(url, headers=headers, json=data)
|
||||||
print(f"response: {response.text}")
|
|
||||||
response = response.json()
|
response = response.json()
|
||||||
return response
|
return response
|
|
@ -8,7 +8,7 @@
|
||||||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
import time
|
import time, logging
|
||||||
import json, traceback
|
import json, traceback
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,7 +37,6 @@ class RedisCache(BaseCache):
|
||||||
def __init__(self, host, port, password):
|
def __init__(self, host, port, password):
|
||||||
import redis
|
import redis
|
||||||
# if users don't provider one, use the default litellm cache
|
# if users don't provider one, use the default litellm cache
|
||||||
print(f"HOST: {host}; PORT: {port}; PASSWORD: {password}")
|
|
||||||
self.redis_client = redis.Redis(host=host, port=port, password=password)
|
self.redis_client = redis.Redis(host=host, port=port, password=password)
|
||||||
|
|
||||||
def set_cache(self, key, value, **kwargs):
|
def set_cache(self, key, value, **kwargs):
|
||||||
|
@ -46,7 +45,7 @@ class RedisCache(BaseCache):
|
||||||
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
print("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||||
|
|
||||||
def get_cache(self, key, **kwargs):
|
def get_cache(self, key, **kwargs):
|
||||||
try:
|
try:
|
||||||
|
@ -61,13 +60,13 @@ class RedisCache(BaseCache):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print("LiteLLM Caching: get() - Got exception from REDIS: ", e)
|
logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
|
||||||
|
|
||||||
|
|
||||||
class HostedCache(BaseCache):
|
class HostedCache(BaseCache):
|
||||||
def set_cache(self, key, value, **kwargs):
|
def set_cache(self, key, value, **kwargs):
|
||||||
if "ttl" in kwargs:
|
if "ttl" in kwargs:
|
||||||
print("LiteLLM Caching: TTL is not supported for hosted cache!")
|
logging.debug("LiteLLM Caching: TTL is not supported for hosted cache!")
|
||||||
# make a post request to api.litellm.ai/set_cache
|
# make a post request to api.litellm.ai/set_cache
|
||||||
import requests
|
import requests
|
||||||
url = f"https://api.litellm.ai/set_cache?key={key}&value={str(value)}"
|
url = f"https://api.litellm.ai/set_cache?key={key}&value={str(value)}"
|
||||||
|
@ -200,12 +199,10 @@ class Cache:
|
||||||
cached_result = self.cache.get_cache(cache_key)
|
cached_result = self.cache.get_cache(cache_key)
|
||||||
if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
|
if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
|
||||||
# if streaming is true and we got a cache hit, return a generator
|
# if streaming is true and we got a cache hit, return a generator
|
||||||
# print("cache hit and stream=True")
|
|
||||||
# print(cached_result)
|
|
||||||
return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
|
return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
|
||||||
return cached_result
|
return cached_result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An exception occurred: {traceback.format_exc()}")
|
logging.debug(f"An exception occurred: {traceback.format_exc()}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def add_cache(self, result, *args, **kwargs):
|
def add_cache(self, result, *args, **kwargs):
|
||||||
|
@ -224,10 +221,7 @@ class Cache:
|
||||||
cache_key = kwargs["cache_key"]
|
cache_key = kwargs["cache_key"]
|
||||||
else:
|
else:
|
||||||
cache_key = self.get_cache_key(*args, **kwargs)
|
cache_key = self.get_cache_key(*args, **kwargs)
|
||||||
# print("adding to cache", cache_key, result)
|
|
||||||
# print(cache_key)
|
|
||||||
if cache_key is not None:
|
if cache_key is not None:
|
||||||
# print("adding to cache", cache_key, result)
|
|
||||||
self.cache.set_cache(cache_key, result, **kwargs)
|
self.cache.set_cache(cache_key, result, **kwargs)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -36,10 +36,6 @@ class LangFuseLogger:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Langfuse Logging - Enters logging function for model {kwargs}"
|
f"Langfuse Logging - Enters logging function for model {kwargs}"
|
||||||
)
|
)
|
||||||
# print(response_obj)
|
|
||||||
# print(response_obj['choices'][0]['message']['content'])
|
|
||||||
# print(response_obj['usage']['prompt_tokens'])
|
|
||||||
# print(response_obj['usage']['completion_tokens'])
|
|
||||||
metadata = kwargs.get("metadata", {})
|
metadata = kwargs.get("metadata", {})
|
||||||
prompt = [kwargs['messages']]
|
prompt = [kwargs['messages']]
|
||||||
|
|
||||||
|
|
|
@ -397,7 +397,6 @@ def completion(
|
||||||
outputText = response_body.get('results')[0].get('outputText')
|
outputText = response_body.get('results')[0].get('outputText')
|
||||||
|
|
||||||
response_metadata = response.get("ResponseMetadata", {})
|
response_metadata = response.get("ResponseMetadata", {})
|
||||||
print(f"response_metadata: {response_metadata}")
|
|
||||||
if response_metadata.get("HTTPStatusCode", 500) >= 400:
|
if response_metadata.get("HTTPStatusCode", 500) >= 400:
|
||||||
raise BedrockError(
|
raise BedrockError(
|
||||||
message=outputText,
|
message=outputText,
|
||||||
|
|
|
@ -147,7 +147,6 @@ def get_ollama_response_stream(
|
||||||
yield completion_obj
|
yield completion_obj
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print(f"Error decoding JSON: {e}")
|
|
||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
if async_generator_imported:
|
if async_generator_imported:
|
||||||
|
@ -198,5 +197,6 @@ if async_generator_imported:
|
||||||
completion_obj["content"] = j["response"]
|
completion_obj["content"] = j["response"]
|
||||||
await yield_({"choices": [{"delta": completion_obj}]})
|
await yield_({"choices": [{"delta": completion_obj}]})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error decoding JSON: {e}")
|
import logging
|
||||||
|
logging.debug(f"Error decoding JSON: {e}")
|
||||||
session.close()
|
session.close()
|
|
@ -1624,13 +1624,9 @@ def batch_completion_models_all_responses(*args, **kwargs):
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
||||||
for idx, model in enumerate(models):
|
for idx, model in enumerate(models):
|
||||||
print(f"{GREEN}LiteLLM: Making request to model: {model}{RESET}")
|
|
||||||
future = executor.submit(completion, *args, model=model, **kwargs)
|
future = executor.submit(completion, *args, model=model, **kwargs)
|
||||||
if future.result() is not None:
|
if future.result() is not None:
|
||||||
responses.append(future.result())
|
responses.append(future.result())
|
||||||
print(f"{GREEN}LiteLLM: Model {model} returned response{RESET}")
|
|
||||||
else:
|
|
||||||
print(f"{RED}LiteLLM: Model {model } did not return a response{RESET}")
|
|
||||||
|
|
||||||
return responses
|
return responses
|
||||||
|
|
||||||
|
@ -1863,6 +1859,7 @@ def embedding(
|
||||||
|
|
||||||
###### Text Completion ################
|
###### Text Completion ################
|
||||||
def text_completion(*args, **kwargs):
|
def text_completion(*args, **kwargs):
|
||||||
|
global print_verbose
|
||||||
import copy
|
import copy
|
||||||
"""
|
"""
|
||||||
This maps to the Openai.Completion.create format, which has a different I/O (accepts prompt, returning ["choices"]["text"].
|
This maps to the Openai.Completion.create format, which has a different I/O (accepts prompt, returning ["choices"]["text"].
|
||||||
|
@ -1930,7 +1927,7 @@ def text_completion(*args, **kwargs):
|
||||||
raw_response = response._hidden_params.get("original_response", None)
|
raw_response = response._hidden_params.get("original_response", None)
|
||||||
transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
|
transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("LiteLLM non blocking exception", e)
|
print_verbose("LiteLLM non blocking exception", e)
|
||||||
text_completion_response["id"] = response["id"]
|
text_completion_response["id"] = response["id"]
|
||||||
text_completion_response["object"] = "text_completion"
|
text_completion_response["object"] = "text_completion"
|
||||||
text_completion_response["created"] = response["created"]
|
text_completion_response["created"] = response["created"]
|
||||||
|
@ -1964,7 +1961,8 @@ def moderation(input: str, api_key: Optional[str]=None):
|
||||||
## Set verbose to true -> ```litellm.set_verbose = True```
|
## Set verbose to true -> ```litellm.set_verbose = True```
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
if litellm.set_verbose:
|
if litellm.set_verbose:
|
||||||
print(f"LiteLLM: {print_statement}")
|
import logging
|
||||||
|
logging.info(f"LiteLLM: {print_statement}")
|
||||||
|
|
||||||
def config_completion(**kwargs):
|
def config_completion(**kwargs):
|
||||||
if litellm.config_path != None:
|
if litellm.config_path != None:
|
||||||
|
|
|
@ -285,7 +285,8 @@ class TextCompletionResponse(OpenAIObject):
|
||||||
############################################################
|
############################################################
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
if litellm.set_verbose:
|
if litellm.set_verbose:
|
||||||
print(f"LiteLLM: {print_statement}")
|
import logging
|
||||||
|
logging.info(f"LiteLLM: {print_statement}")
|
||||||
|
|
||||||
####### LOGGING ###################
|
####### LOGGING ###################
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
@ -538,8 +539,6 @@ class Logging:
|
||||||
print_verbose("reaches api manager for updating model cost")
|
print_verbose("reaches api manager for updating model cost")
|
||||||
litellm.apiManager.update_cost(completion_obj=result, user=self.user)
|
litellm.apiManager.update_cost(completion_obj=result, user=self.user)
|
||||||
if callback == "cache":
|
if callback == "cache":
|
||||||
# print("entering logger first time")
|
|
||||||
# print(self.litellm_params["stream_response"])
|
|
||||||
if litellm.cache != None and self.model_call_details.get('optional_params', {}).get('stream', False) == True:
|
if litellm.cache != None and self.model_call_details.get('optional_params', {}).get('stream', False) == True:
|
||||||
litellm_call_id = self.litellm_params["litellm_call_id"]
|
litellm_call_id = self.litellm_params["litellm_call_id"]
|
||||||
if litellm_call_id in self.litellm_params["stream_response"]:
|
if litellm_call_id in self.litellm_params["stream_response"]:
|
||||||
|
@ -550,10 +549,7 @@ class Logging:
|
||||||
self.litellm_params["stream_response"][litellm_call_id]["choices"][0]["message"]["content"] += result["content"]
|
self.litellm_params["stream_response"][litellm_call_id]["choices"][0]["message"]["content"] += result["content"]
|
||||||
else: # init a streaming response for this call id
|
else: # init a streaming response for this call id
|
||||||
new_model_response = ModelResponse(choices=[Choices(message=Message(content="default"))])
|
new_model_response = ModelResponse(choices=[Choices(message=Message(content="default"))])
|
||||||
#print("creating new model response")
|
|
||||||
#print(new_model_response)
|
|
||||||
self.litellm_params["stream_response"][litellm_call_id] = new_model_response
|
self.litellm_params["stream_response"][litellm_call_id] = new_model_response
|
||||||
#print("adding to cache for", litellm_call_id)
|
|
||||||
litellm.cache.add_cache(self.litellm_params["stream_response"][litellm_call_id], **self.model_call_details)
|
litellm.cache.add_cache(self.litellm_params["stream_response"][litellm_call_id], **self.model_call_details)
|
||||||
if callback == "promptlayer":
|
if callback == "promptlayer":
|
||||||
print_verbose("reaches promptlayer for logging!")
|
print_verbose("reaches promptlayer for logging!")
|
||||||
|
@ -576,7 +572,6 @@ class Logging:
|
||||||
print_verbose("reaches supabase for streaming logging!")
|
print_verbose("reaches supabase for streaming logging!")
|
||||||
result = kwargs["complete_streaming_response"]
|
result = kwargs["complete_streaming_response"]
|
||||||
|
|
||||||
# print(kwargs)
|
|
||||||
model = kwargs["model"]
|
model = kwargs["model"]
|
||||||
messages = kwargs["messages"]
|
messages = kwargs["messages"]
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
@ -732,11 +727,11 @@ def exception_logging(
|
||||||
model_call_details
|
model_call_details
|
||||||
) # Expectation: any logger function passed in by the user should accept a dict object
|
) # Expectation: any logger function passed in by the user should accept a dict object
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
pass
|
pass
|
||||||
|
@ -799,7 +794,6 @@ def client(original_function):
|
||||||
return logging_obj
|
return logging_obj
|
||||||
except Exception as e: # DO NOT BLOCK running the function because of this
|
except Exception as e: # DO NOT BLOCK running the function because of this
|
||||||
print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}")
|
print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}")
|
||||||
print(e)
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def crash_reporting(*args, **kwargs):
|
def crash_reporting(*args, **kwargs):
|
||||||
|
@ -1776,9 +1770,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
|
||||||
custom_llm_provider = "bedrock"
|
custom_llm_provider = "bedrock"
|
||||||
|
|
||||||
if custom_llm_provider is None or custom_llm_provider=="":
|
if custom_llm_provider is None or custom_llm_provider=="":
|
||||||
print()
|
print() # noqa
|
||||||
print("\033[1;31mProvider List: https://docs.litellm.ai/docs/providers\033[0m")
|
print("\033[1;31mProvider List: https://docs.litellm.ai/docs/providers\033[0m") # noqa
|
||||||
print()
|
print() # noqa
|
||||||
raise ValueError(f"LLM Provider NOT provided. Pass in the LLM provider you are trying to call. E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/{model}',..)` Learn more: https://docs.litellm.ai/docs/providers")
|
raise ValueError(f"LLM Provider NOT provided. Pass in the LLM provider you are trying to call. E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/{model}',..)` Learn more: https://docs.litellm.ai/docs/providers")
|
||||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -2772,7 +2766,7 @@ def get_all_keys(llm_provider=None):
|
||||||
|
|
||||||
|
|
||||||
def get_model_list():
|
def get_model_list():
|
||||||
global last_fetched_at
|
global last_fetched_at, print_verbose
|
||||||
try:
|
try:
|
||||||
# if user is using hosted product -> get their updated model list
|
# if user is using hosted product -> get their updated model list
|
||||||
user_email = (
|
user_email = (
|
||||||
|
@ -2784,7 +2778,7 @@ def get_model_list():
|
||||||
if user_email:
|
if user_email:
|
||||||
# make the api call
|
# make the api call
|
||||||
last_fetched_at = time.time()
|
last_fetched_at = time.time()
|
||||||
print(f"last_fetched_at: {last_fetched_at}")
|
print_verbose(f"last_fetched_at: {last_fetched_at}")
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
url="http://api.litellm.ai/get_model_list",
|
url="http://api.litellm.ai/get_model_list",
|
||||||
headers={"content-type": "application/json"},
|
headers={"content-type": "application/json"},
|
||||||
|
@ -2820,10 +2814,10 @@ def exception_type(
|
||||||
global user_logger_fn, liteDebuggerClient
|
global user_logger_fn, liteDebuggerClient
|
||||||
exception_mapping_worked = False
|
exception_mapping_worked = False
|
||||||
if litellm.suppress_debug_info is False:
|
if litellm.suppress_debug_info is False:
|
||||||
print()
|
print() # noqa
|
||||||
print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m")
|
print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m") # noqa
|
||||||
print("LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.")
|
print("LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.") # noqa
|
||||||
print()
|
print() # noqa
|
||||||
try:
|
try:
|
||||||
if isinstance(original_exception, OriginalError):
|
if isinstance(original_exception, OriginalError):
|
||||||
# Handle the OpenAIError
|
# Handle the OpenAIError
|
||||||
|
@ -3401,7 +3395,7 @@ def exception_type(
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
elif hasattr(original_exception, "status_code"):
|
elif hasattr(original_exception, "status_code"):
|
||||||
print(f"status code: {original_exception.status_code}")
|
print_verbose(f"status code: {original_exception.status_code}")
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
|
@ -4267,12 +4261,11 @@ def completion_with_fallbacks(**kwargs):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print_verbose(e)
|
||||||
rate_limited_models.add(model)
|
rate_limited_models.add(model)
|
||||||
model_expiration_times[model] = (
|
model_expiration_times[model] = (
|
||||||
time.time() + 60
|
time.time() + 60
|
||||||
) # cool down this selected model
|
) # cool down this selected model
|
||||||
# print(f"rate_limited_models {rate_limited_models}")
|
|
||||||
pass
|
pass
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -4417,7 +4410,7 @@ def trim_messages(
|
||||||
|
|
||||||
return final_messages
|
return final_messages
|
||||||
except Exception as e: # [NON-Blocking, if error occurs just return final_messages
|
except Exception as e: # [NON-Blocking, if error occurs just return final_messages
|
||||||
print("Got exception while token trimming", e)
|
print_verbose("Got exception while token trimming", e)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def get_valid_models():
|
def get_valid_models():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue