linting fixes

This commit is contained in:
ishaan-jaff 2023-08-22 15:17:14 -07:00
parent 950bfe4031
commit f3334a9634
6 changed files with 52 additions and 32 deletions

View file

@ -5,7 +5,7 @@ input_callback: List[str] = []
success_callback: List[str] = []
failure_callback: List[str] = []
set_verbose = False
email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging
email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging
telemetry = True
max_tokens = 256 # OpenAI Defaults
retry = True
@ -261,7 +261,7 @@ from .utils import (
get_litellm_params,
Logging,
acreate,
get_model_list
get_model_list,
)
from .main import * # type: ignore
from .integrations import *

View file

@ -4,6 +4,7 @@ import requests, traceback, json, os
class LiteDebugger:
user_email = None
dashboard_url = None
def __init__(self, email=None):
self.api_url = "https://api.litellm.ai/debugger"
self.validate_environment(email)
@ -12,7 +13,7 @@ class LiteDebugger:
def validate_environment(self, email):
try:
self.user_email = os.getenv("LITELLM_EMAIL") or email
self.dashboard_url = 'https://admin.litellm.ai/' + self.user_email
self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
print(f"Here's your free Dashboard 👉 {self.dashboard_url}")
if self.user_email == None:
raise Exception(

View file

@ -17,7 +17,7 @@ from litellm.utils import (
install_and_import,
CustomStreamWrapper,
read_config_args,
completion_with_fallbacks
completion_with_fallbacks,
)
from .llms.anthropic import AnthropicLLM
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
@ -187,7 +187,7 @@ def completion(
response = openai.ChatCompletion.create(
engine=model, messages=messages, **optional_params
)
## LOGGING
logging.post_call(
input=messages,

View file

@ -11,6 +11,7 @@ sys.path.insert(
import pytest
import litellm
from litellm import embedding, completion
litellm.debugger = True
# from infisical import InfisicalClient
@ -349,13 +350,10 @@ def test_petals():
def test_completion_with_fallbacks():
fallbacks = ['gpt-3.5-turb', 'gpt-3.5-turbo', 'command-nightly']
fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"]
try:
response = completion(
model='bad-model',
messages=messages,
force_timeout=120,
fallbacks=fallbacks
model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
)
# Add any assertions here to check the response
print(response)

View file

@ -8,4 +8,4 @@ from litellm import get_model_list
print(get_model_list())
print(get_model_list())
# print(litellm.model_list)
# print(litellm.model_list)

View file

@ -288,7 +288,9 @@ def client(original_function):
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
try:
global callback_list, add_breadcrumb, user_logger_fn
if litellm.email or os.getenv("LITELLM_EMAIL", None) != None: # add to input, success and failure callbacks if user sets debugging to true
if (
litellm.debugger or os.getenv("LITELLM_EMAIL", None) != None
): # add to input, success and failure callbacks if user sets debugging to true
litellm.input_callback.append("lite_debugger")
litellm.success_callback.append("lite_debugger")
litellm.failure_callback.append("lite_debugger")
@ -1020,35 +1022,44 @@ def handle_success(args, kwargs, result, start_time, end_time):
)
pass
def get_model_list():
global last_fetched_at
# if user is using hosted product -> get their updated model list - refresh every 5 minutes
user_email = (os.getenv("LITELLM_EMAIL") or litellm.email)
user_email = os.getenv("LITELLM_EMAIL") or litellm.email
if user_email:
time_delta = 0
if last_fetched_at != None:
current_time = time.time()
current_time = time.time()
time_delta = current_time - last_fetched_at
if time_delta > 300 or last_fetched_at == None:
# make the api call
# make the api call
last_fetched_at = time.time()
print(f"last_fetched_at: {last_fetched_at}")
response = requests.get(url="http://api.litellm.ai/get_model_list", headers={"content-type": "application/json"}, data=json.dumps({"user_email": user_email}))
response = requests.get(
url="http://api.litellm.ai/get_model_list",
headers={"content-type": "application/json"},
data=json.dumps({"user_email": user_email}),
)
print_verbose(f"get_model_list response: {response.text}")
data = response.json()
# update model list
model_list = data["model_list"]
# set environment variables
# set environment variables
env_dict = data["model_keys"]
for key, value in env_dict.items():
os.environ[key] = value
litellm.model_list = model_list # update the user's current litellm model list
litellm.model_list = (
model_list # update the user's current litellm model list
)
# return litellm model list by default
return litellm.model_list
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
return litellm.acompletion(*args, **kwargs)
def prompt_token_calculator(model, messages):
# use tiktoken or anthropic's tokenizer depending on the model
text = " ".join(message["content"] for message in messages)
@ -1063,6 +1074,7 @@ def prompt_token_calculator(model, messages):
num_tokens = len(encoding.encode(text))
return num_tokens
def valid_model(model):
try:
# for a given model name, check if the user has the right permissions to access the model
@ -1471,22 +1483,29 @@ def completion_with_fallbacks(**kwargs):
rate_limited_models = set()
model_expiration_times = {}
start_time = time.time()
fallbacks = [kwargs['model']] + kwargs['fallbacks']
del kwargs['fallbacks'] # remove fallbacks so it's not recursive
fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
del kwargs["fallbacks"] # remove fallbacks so it's not recursive
while response == None and time.time() - start_time < 45:
for model in fallbacks:
# loop thru all models
# loop thru all models
try:
if model in rate_limited_models: # check if model is currently cooling down
if model_expiration_times.get(model) and time.time() >= model_expiration_times[model]:
rate_limited_models.remove(model) # check if it's been 60s of cool down and remove model
if (
model in rate_limited_models
): # check if model is currently cooling down
if (
model_expiration_times.get(model)
and time.time() >= model_expiration_times[model]
):
rate_limited_models.remove(
model
) # check if it's been 60s of cool down and remove model
else:
continue # skip model
continue # skip model
# delete model from kwargs if it exists
if kwargs.get('model'):
del kwargs['model']
if kwargs.get("model"):
del kwargs["model"]
print("making completion call", model)
response = litellm.completion(**kwargs, model=model)
@ -1497,7 +1516,9 @@ def completion_with_fallbacks(**kwargs):
except Exception as e:
print(f"got exception {e} for model {model}")
rate_limited_models.add(model)
model_expiration_times[model] = time.time() + 60 # cool down this selected model
#print(f"rate_limited_models {rate_limited_models}")
model_expiration_times[model] = (
time.time() + 60
) # cool down this selected model
# print(f"rate_limited_models {rate_limited_models}")
pass
return response
return response