diff --git a/litellm/__init__.py b/litellm/__init__.py index e61ee1f59..532fe0701 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -5,7 +5,7 @@ input_callback: List[str] = [] success_callback: List[str] = [] failure_callback: List[str] = [] set_verbose = False -email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging +email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging telemetry = True max_tokens = 256 # OpenAI Defaults retry = True @@ -261,7 +261,7 @@ from .utils import ( get_litellm_params, Logging, acreate, - get_model_list + get_model_list, ) from .main import * # type: ignore from .integrations import * diff --git a/litellm/integrations/litedebugger.py b/litellm/integrations/litedebugger.py index 74b6ec4de..19b2b1777 100644 --- a/litellm/integrations/litedebugger.py +++ b/litellm/integrations/litedebugger.py @@ -4,6 +4,7 @@ import requests, traceback, json, os class LiteDebugger: user_email = None dashboard_url = None + def __init__(self, email=None): self.api_url = "https://api.litellm.ai/debugger" self.validate_environment(email) @@ -12,7 +13,7 @@ class LiteDebugger: def validate_environment(self, email): try: self.user_email = os.getenv("LITELLM_EMAIL") or email - self.dashboard_url = 'https://admin.litellm.ai/' + self.user_email + self.dashboard_url = "https://admin.litellm.ai/" + self.user_email print(f"Here's your free Dashboard 👉 {self.dashboard_url}") if self.user_email == None: raise Exception( diff --git a/litellm/main.py b/litellm/main.py index e0240892d..3cd8a0729 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -17,7 +17,7 @@ from litellm.utils import ( install_and_import, CustomStreamWrapper, read_config_args, - completion_with_fallbacks + completion_with_fallbacks, ) from .llms.anthropic import AnthropicLLM from .llms.huggingface_restapi import HuggingfaceRestAPILLM @@ -187,7 +187,7 @@ def completion( response = openai.ChatCompletion.create( engine=model, messages=messages, **optional_params ) - + ## LOGGING logging.post_call( input=messages, diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index e6f75b8dc..6a58088e3 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -11,6 +11,7 @@ sys.path.insert( import pytest import litellm from litellm import embedding, completion + litellm.debugger = True # from infisical import InfisicalClient @@ -349,13 +350,10 @@ def test_petals(): def test_completion_with_fallbacks(): - fallbacks = ['gpt-3.5-turb', 'gpt-3.5-turbo', 'command-nightly'] + fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"] try: response = completion( - model='bad-model', - messages=messages, - force_timeout=120, - fallbacks=fallbacks + model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks ) # Add any assertions here to check the response print(response) diff --git a/litellm/tests/test_get_model_list.py b/litellm/tests/test_get_model_list.py index af91715e4..7663eebf5 100644 --- a/litellm/tests/test_get_model_list.py +++ b/litellm/tests/test_get_model_list.py @@ -8,4 +8,4 @@ from litellm import get_model_list print(get_model_list()) print(get_model_list()) -# print(litellm.model_list) \ No newline at end of file +# print(litellm.model_list) diff --git a/litellm/utils.py b/litellm/utils.py index 40cf142ae..a979c5621 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -288,7 +288,9 @@ def client(original_function): ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. try: global callback_list, add_breadcrumb, user_logger_fn - if litellm.email or os.getenv("LITELLM_EMAIL", None) != None: # add to input, success and failure callbacks if user sets debugging to true + if ( + litellm.debugger or os.getenv("LITELLM_EMAIL", None) != None + ): # add to input, success and failure callbacks if user sets debugging to true litellm.input_callback.append("lite_debugger") litellm.success_callback.append("lite_debugger") litellm.failure_callback.append("lite_debugger") @@ -1020,35 +1022,44 @@ def handle_success(args, kwargs, result, start_time, end_time): ) pass + def get_model_list(): global last_fetched_at # if user is using hosted product -> get their updated model list - refresh every 5 minutes - user_email = (os.getenv("LITELLM_EMAIL") or litellm.email) + user_email = os.getenv("LITELLM_EMAIL") or litellm.email if user_email: time_delta = 0 if last_fetched_at != None: - current_time = time.time() + current_time = time.time() time_delta = current_time - last_fetched_at if time_delta > 300 or last_fetched_at == None: - # make the api call + # make the api call last_fetched_at = time.time() print(f"last_fetched_at: {last_fetched_at}") - response = requests.get(url="http://api.litellm.ai/get_model_list", headers={"content-type": "application/json"}, data=json.dumps({"user_email": user_email})) + response = requests.get( + url="http://api.litellm.ai/get_model_list", + headers={"content-type": "application/json"}, + data=json.dumps({"user_email": user_email}), + ) print_verbose(f"get_model_list response: {response.text}") data = response.json() # update model list model_list = data["model_list"] - # set environment variables + # set environment variables env_dict = data["model_keys"] for key, value in env_dict.items(): os.environ[key] = value - litellm.model_list = model_list # update the user's current litellm model list + litellm.model_list = ( + model_list # update the user's current litellm model list + ) # return litellm model list by default return litellm.model_list -def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call + +def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call return litellm.acompletion(*args, **kwargs) + def prompt_token_calculator(model, messages): # use tiktoken or anthropic's tokenizer depending on the model text = " ".join(message["content"] for message in messages) @@ -1063,6 +1074,7 @@ def prompt_token_calculator(model, messages): num_tokens = len(encoding.encode(text)) return num_tokens + def valid_model(model): try: # for a given model name, check if the user has the right permissions to access the model @@ -1471,22 +1483,29 @@ def completion_with_fallbacks(**kwargs): rate_limited_models = set() model_expiration_times = {} start_time = time.time() - fallbacks = [kwargs['model']] + kwargs['fallbacks'] - del kwargs['fallbacks'] # remove fallbacks so it's not recursive + fallbacks = [kwargs["model"]] + kwargs["fallbacks"] + del kwargs["fallbacks"] # remove fallbacks so it's not recursive while response == None and time.time() - start_time < 45: for model in fallbacks: - # loop thru all models + # loop thru all models try: - if model in rate_limited_models: # check if model is currently cooling down - if model_expiration_times.get(model) and time.time() >= model_expiration_times[model]: - rate_limited_models.remove(model) # check if it's been 60s of cool down and remove model + if ( + model in rate_limited_models + ): # check if model is currently cooling down + if ( + model_expiration_times.get(model) + and time.time() >= model_expiration_times[model] + ): + rate_limited_models.remove( + model + ) # check if it's been 60s of cool down and remove model else: - continue # skip model - + continue # skip model + # delete model from kwargs if it exists - if kwargs.get('model'): - del kwargs['model'] + if kwargs.get("model"): + del kwargs["model"] print("making completion call", model) response = litellm.completion(**kwargs, model=model) @@ -1497,7 +1516,9 @@ def completion_with_fallbacks(**kwargs): except Exception as e: print(f"got exception {e} for model {model}") rate_limited_models.add(model) - model_expiration_times[model] = time.time() + 60 # cool down this selected model - #print(f"rate_limited_models {rate_limited_models}") + model_expiration_times[model] = ( + time.time() + 60 + ) # cool down this selected model + # print(f"rate_limited_models {rate_limited_models}") pass - return response \ No newline at end of file + return response