mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
linting fixes
This commit is contained in:
parent
950bfe4031
commit
f3334a9634
6 changed files with 52 additions and 32 deletions
|
@ -5,7 +5,7 @@ input_callback: List[str] = []
|
|||
success_callback: List[str] = []
|
||||
failure_callback: List[str] = []
|
||||
set_verbose = False
|
||||
email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging
|
||||
email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging
|
||||
telemetry = True
|
||||
max_tokens = 256 # OpenAI Defaults
|
||||
retry = True
|
||||
|
@ -261,7 +261,7 @@ from .utils import (
|
|||
get_litellm_params,
|
||||
Logging,
|
||||
acreate,
|
||||
get_model_list
|
||||
get_model_list,
|
||||
)
|
||||
from .main import * # type: ignore
|
||||
from .integrations import *
|
||||
|
|
|
@ -4,6 +4,7 @@ import requests, traceback, json, os
|
|||
class LiteDebugger:
|
||||
user_email = None
|
||||
dashboard_url = None
|
||||
|
||||
def __init__(self, email=None):
|
||||
self.api_url = "https://api.litellm.ai/debugger"
|
||||
self.validate_environment(email)
|
||||
|
@ -12,7 +13,7 @@ class LiteDebugger:
|
|||
def validate_environment(self, email):
|
||||
try:
|
||||
self.user_email = os.getenv("LITELLM_EMAIL") or email
|
||||
self.dashboard_url = 'https://admin.litellm.ai/' + self.user_email
|
||||
self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
|
||||
print(f"Here's your free Dashboard 👉 {self.dashboard_url}")
|
||||
if self.user_email == None:
|
||||
raise Exception(
|
||||
|
|
|
@ -17,7 +17,7 @@ from litellm.utils import (
|
|||
install_and_import,
|
||||
CustomStreamWrapper,
|
||||
read_config_args,
|
||||
completion_with_fallbacks
|
||||
completion_with_fallbacks,
|
||||
)
|
||||
from .llms.anthropic import AnthropicLLM
|
||||
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
|
||||
|
@ -187,7 +187,7 @@ def completion(
|
|||
response = openai.ChatCompletion.create(
|
||||
engine=model, messages=messages, **optional_params
|
||||
)
|
||||
|
||||
|
||||
## LOGGING
|
||||
logging.post_call(
|
||||
input=messages,
|
||||
|
|
|
@ -11,6 +11,7 @@ sys.path.insert(
|
|||
import pytest
|
||||
import litellm
|
||||
from litellm import embedding, completion
|
||||
|
||||
litellm.debugger = True
|
||||
|
||||
# from infisical import InfisicalClient
|
||||
|
@ -349,13 +350,10 @@ def test_petals():
|
|||
|
||||
|
||||
def test_completion_with_fallbacks():
|
||||
fallbacks = ['gpt-3.5-turb', 'gpt-3.5-turbo', 'command-nightly']
|
||||
fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"]
|
||||
try:
|
||||
response = completion(
|
||||
model='bad-model',
|
||||
messages=messages,
|
||||
force_timeout=120,
|
||||
fallbacks=fallbacks
|
||||
model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
|
|
|
@ -8,4 +8,4 @@ from litellm import get_model_list
|
|||
|
||||
print(get_model_list())
|
||||
print(get_model_list())
|
||||
# print(litellm.model_list)
|
||||
# print(litellm.model_list)
|
||||
|
|
|
@ -288,7 +288,9 @@ def client(original_function):
|
|||
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||
try:
|
||||
global callback_list, add_breadcrumb, user_logger_fn
|
||||
if litellm.email or os.getenv("LITELLM_EMAIL", None) != None: # add to input, success and failure callbacks if user sets debugging to true
|
||||
if (
|
||||
litellm.debugger or os.getenv("LITELLM_EMAIL", None) != None
|
||||
): # add to input, success and failure callbacks if user sets debugging to true
|
||||
litellm.input_callback.append("lite_debugger")
|
||||
litellm.success_callback.append("lite_debugger")
|
||||
litellm.failure_callback.append("lite_debugger")
|
||||
|
@ -1020,35 +1022,44 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
|||
)
|
||||
pass
|
||||
|
||||
|
||||
def get_model_list():
|
||||
global last_fetched_at
|
||||
# if user is using hosted product -> get their updated model list - refresh every 5 minutes
|
||||
user_email = (os.getenv("LITELLM_EMAIL") or litellm.email)
|
||||
user_email = os.getenv("LITELLM_EMAIL") or litellm.email
|
||||
if user_email:
|
||||
time_delta = 0
|
||||
if last_fetched_at != None:
|
||||
current_time = time.time()
|
||||
current_time = time.time()
|
||||
time_delta = current_time - last_fetched_at
|
||||
if time_delta > 300 or last_fetched_at == None:
|
||||
# make the api call
|
||||
# make the api call
|
||||
last_fetched_at = time.time()
|
||||
print(f"last_fetched_at: {last_fetched_at}")
|
||||
response = requests.get(url="http://api.litellm.ai/get_model_list", headers={"content-type": "application/json"}, data=json.dumps({"user_email": user_email}))
|
||||
response = requests.get(
|
||||
url="http://api.litellm.ai/get_model_list",
|
||||
headers={"content-type": "application/json"},
|
||||
data=json.dumps({"user_email": user_email}),
|
||||
)
|
||||
print_verbose(f"get_model_list response: {response.text}")
|
||||
data = response.json()
|
||||
# update model list
|
||||
model_list = data["model_list"]
|
||||
# set environment variables
|
||||
# set environment variables
|
||||
env_dict = data["model_keys"]
|
||||
for key, value in env_dict.items():
|
||||
os.environ[key] = value
|
||||
litellm.model_list = model_list # update the user's current litellm model list
|
||||
litellm.model_list = (
|
||||
model_list # update the user's current litellm model list
|
||||
)
|
||||
# return litellm model list by default
|
||||
return litellm.model_list
|
||||
|
||||
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
|
||||
|
||||
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
|
||||
return litellm.acompletion(*args, **kwargs)
|
||||
|
||||
|
||||
def prompt_token_calculator(model, messages):
|
||||
# use tiktoken or anthropic's tokenizer depending on the model
|
||||
text = " ".join(message["content"] for message in messages)
|
||||
|
@ -1063,6 +1074,7 @@ def prompt_token_calculator(model, messages):
|
|||
num_tokens = len(encoding.encode(text))
|
||||
return num_tokens
|
||||
|
||||
|
||||
def valid_model(model):
|
||||
try:
|
||||
# for a given model name, check if the user has the right permissions to access the model
|
||||
|
@ -1471,22 +1483,29 @@ def completion_with_fallbacks(**kwargs):
|
|||
rate_limited_models = set()
|
||||
model_expiration_times = {}
|
||||
start_time = time.time()
|
||||
fallbacks = [kwargs['model']] + kwargs['fallbacks']
|
||||
del kwargs['fallbacks'] # remove fallbacks so it's not recursive
|
||||
fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
|
||||
del kwargs["fallbacks"] # remove fallbacks so it's not recursive
|
||||
|
||||
while response == None and time.time() - start_time < 45:
|
||||
for model in fallbacks:
|
||||
# loop thru all models
|
||||
# loop thru all models
|
||||
try:
|
||||
if model in rate_limited_models: # check if model is currently cooling down
|
||||
if model_expiration_times.get(model) and time.time() >= model_expiration_times[model]:
|
||||
rate_limited_models.remove(model) # check if it's been 60s of cool down and remove model
|
||||
if (
|
||||
model in rate_limited_models
|
||||
): # check if model is currently cooling down
|
||||
if (
|
||||
model_expiration_times.get(model)
|
||||
and time.time() >= model_expiration_times[model]
|
||||
):
|
||||
rate_limited_models.remove(
|
||||
model
|
||||
) # check if it's been 60s of cool down and remove model
|
||||
else:
|
||||
continue # skip model
|
||||
|
||||
continue # skip model
|
||||
|
||||
# delete model from kwargs if it exists
|
||||
if kwargs.get('model'):
|
||||
del kwargs['model']
|
||||
if kwargs.get("model"):
|
||||
del kwargs["model"]
|
||||
|
||||
print("making completion call", model)
|
||||
response = litellm.completion(**kwargs, model=model)
|
||||
|
@ -1497,7 +1516,9 @@ def completion_with_fallbacks(**kwargs):
|
|||
except Exception as e:
|
||||
print(f"got exception {e} for model {model}")
|
||||
rate_limited_models.add(model)
|
||||
model_expiration_times[model] = time.time() + 60 # cool down this selected model
|
||||
#print(f"rate_limited_models {rate_limited_models}")
|
||||
model_expiration_times[model] = (
|
||||
time.time() + 60
|
||||
) # cool down this selected model
|
||||
# print(f"rate_limited_models {rate_limited_models}")
|
||||
pass
|
||||
return response
|
||||
return response
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue