forked from phoenix/litellm-mirror
linting fixes
This commit is contained in:
parent
950bfe4031
commit
f3334a9634
6 changed files with 52 additions and 32 deletions
|
@ -5,7 +5,7 @@ input_callback: List[str] = []
|
||||||
success_callback: List[str] = []
|
success_callback: List[str] = []
|
||||||
failure_callback: List[str] = []
|
failure_callback: List[str] = []
|
||||||
set_verbose = False
|
set_verbose = False
|
||||||
email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging
|
email = None # for hosted dashboard. Learn more - https://docs.litellm.ai/docs/debugging/hosted_debugging
|
||||||
telemetry = True
|
telemetry = True
|
||||||
max_tokens = 256 # OpenAI Defaults
|
max_tokens = 256 # OpenAI Defaults
|
||||||
retry = True
|
retry = True
|
||||||
|
@ -261,7 +261,7 @@ from .utils import (
|
||||||
get_litellm_params,
|
get_litellm_params,
|
||||||
Logging,
|
Logging,
|
||||||
acreate,
|
acreate,
|
||||||
get_model_list
|
get_model_list,
|
||||||
)
|
)
|
||||||
from .main import * # type: ignore
|
from .main import * # type: ignore
|
||||||
from .integrations import *
|
from .integrations import *
|
||||||
|
|
|
@ -4,6 +4,7 @@ import requests, traceback, json, os
|
||||||
class LiteDebugger:
|
class LiteDebugger:
|
||||||
user_email = None
|
user_email = None
|
||||||
dashboard_url = None
|
dashboard_url = None
|
||||||
|
|
||||||
def __init__(self, email=None):
|
def __init__(self, email=None):
|
||||||
self.api_url = "https://api.litellm.ai/debugger"
|
self.api_url = "https://api.litellm.ai/debugger"
|
||||||
self.validate_environment(email)
|
self.validate_environment(email)
|
||||||
|
@ -12,7 +13,7 @@ class LiteDebugger:
|
||||||
def validate_environment(self, email):
|
def validate_environment(self, email):
|
||||||
try:
|
try:
|
||||||
self.user_email = os.getenv("LITELLM_EMAIL") or email
|
self.user_email = os.getenv("LITELLM_EMAIL") or email
|
||||||
self.dashboard_url = 'https://admin.litellm.ai/' + self.user_email
|
self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
|
||||||
print(f"Here's your free Dashboard 👉 {self.dashboard_url}")
|
print(f"Here's your free Dashboard 👉 {self.dashboard_url}")
|
||||||
if self.user_email == None:
|
if self.user_email == None:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
|
|
|
@ -17,7 +17,7 @@ from litellm.utils import (
|
||||||
install_and_import,
|
install_and_import,
|
||||||
CustomStreamWrapper,
|
CustomStreamWrapper,
|
||||||
read_config_args,
|
read_config_args,
|
||||||
completion_with_fallbacks
|
completion_with_fallbacks,
|
||||||
)
|
)
|
||||||
from .llms.anthropic import AnthropicLLM
|
from .llms.anthropic import AnthropicLLM
|
||||||
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
|
from .llms.huggingface_restapi import HuggingfaceRestAPILLM
|
||||||
|
@ -187,7 +187,7 @@ def completion(
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
engine=model, messages=messages, **optional_params
|
engine=model, messages=messages, **optional_params
|
||||||
)
|
)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
|
|
@ -11,6 +11,7 @@ sys.path.insert(
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import embedding, completion
|
||||||
|
|
||||||
litellm.debugger = True
|
litellm.debugger = True
|
||||||
|
|
||||||
# from infisical import InfisicalClient
|
# from infisical import InfisicalClient
|
||||||
|
@ -349,13 +350,10 @@ def test_petals():
|
||||||
|
|
||||||
|
|
||||||
def test_completion_with_fallbacks():
|
def test_completion_with_fallbacks():
|
||||||
fallbacks = ['gpt-3.5-turb', 'gpt-3.5-turbo', 'command-nightly']
|
fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"]
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model='bad-model',
|
model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
|
||||||
messages=messages,
|
|
||||||
force_timeout=120,
|
|
||||||
fallbacks=fallbacks
|
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
|
|
|
@ -8,4 +8,4 @@ from litellm import get_model_list
|
||||||
|
|
||||||
print(get_model_list())
|
print(get_model_list())
|
||||||
print(get_model_list())
|
print(get_model_list())
|
||||||
# print(litellm.model_list)
|
# print(litellm.model_list)
|
||||||
|
|
|
@ -288,7 +288,9 @@ def client(original_function):
|
||||||
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||||
try:
|
try:
|
||||||
global callback_list, add_breadcrumb, user_logger_fn
|
global callback_list, add_breadcrumb, user_logger_fn
|
||||||
if litellm.email or os.getenv("LITELLM_EMAIL", None) != None: # add to input, success and failure callbacks if user sets debugging to true
|
if (
|
||||||
|
litellm.debugger or os.getenv("LITELLM_EMAIL", None) != None
|
||||||
|
): # add to input, success and failure callbacks if user sets debugging to true
|
||||||
litellm.input_callback.append("lite_debugger")
|
litellm.input_callback.append("lite_debugger")
|
||||||
litellm.success_callback.append("lite_debugger")
|
litellm.success_callback.append("lite_debugger")
|
||||||
litellm.failure_callback.append("lite_debugger")
|
litellm.failure_callback.append("lite_debugger")
|
||||||
|
@ -1020,35 +1022,44 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
)
|
)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def get_model_list():
|
def get_model_list():
|
||||||
global last_fetched_at
|
global last_fetched_at
|
||||||
# if user is using hosted product -> get their updated model list - refresh every 5 minutes
|
# if user is using hosted product -> get their updated model list - refresh every 5 minutes
|
||||||
user_email = (os.getenv("LITELLM_EMAIL") or litellm.email)
|
user_email = os.getenv("LITELLM_EMAIL") or litellm.email
|
||||||
if user_email:
|
if user_email:
|
||||||
time_delta = 0
|
time_delta = 0
|
||||||
if last_fetched_at != None:
|
if last_fetched_at != None:
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
time_delta = current_time - last_fetched_at
|
time_delta = current_time - last_fetched_at
|
||||||
if time_delta > 300 or last_fetched_at == None:
|
if time_delta > 300 or last_fetched_at == None:
|
||||||
# make the api call
|
# make the api call
|
||||||
last_fetched_at = time.time()
|
last_fetched_at = time.time()
|
||||||
print(f"last_fetched_at: {last_fetched_at}")
|
print(f"last_fetched_at: {last_fetched_at}")
|
||||||
response = requests.get(url="http://api.litellm.ai/get_model_list", headers={"content-type": "application/json"}, data=json.dumps({"user_email": user_email}))
|
response = requests.get(
|
||||||
|
url="http://api.litellm.ai/get_model_list",
|
||||||
|
headers={"content-type": "application/json"},
|
||||||
|
data=json.dumps({"user_email": user_email}),
|
||||||
|
)
|
||||||
print_verbose(f"get_model_list response: {response.text}")
|
print_verbose(f"get_model_list response: {response.text}")
|
||||||
data = response.json()
|
data = response.json()
|
||||||
# update model list
|
# update model list
|
||||||
model_list = data["model_list"]
|
model_list = data["model_list"]
|
||||||
# set environment variables
|
# set environment variables
|
||||||
env_dict = data["model_keys"]
|
env_dict = data["model_keys"]
|
||||||
for key, value in env_dict.items():
|
for key, value in env_dict.items():
|
||||||
os.environ[key] = value
|
os.environ[key] = value
|
||||||
litellm.model_list = model_list # update the user's current litellm model list
|
litellm.model_list = (
|
||||||
|
model_list # update the user's current litellm model list
|
||||||
|
)
|
||||||
# return litellm model list by default
|
# return litellm model list by default
|
||||||
return litellm.model_list
|
return litellm.model_list
|
||||||
|
|
||||||
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
|
|
||||||
|
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
|
||||||
return litellm.acompletion(*args, **kwargs)
|
return litellm.acompletion(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def prompt_token_calculator(model, messages):
|
def prompt_token_calculator(model, messages):
|
||||||
# use tiktoken or anthropic's tokenizer depending on the model
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
text = " ".join(message["content"] for message in messages)
|
text = " ".join(message["content"] for message in messages)
|
||||||
|
@ -1063,6 +1074,7 @@ def prompt_token_calculator(model, messages):
|
||||||
num_tokens = len(encoding.encode(text))
|
num_tokens = len(encoding.encode(text))
|
||||||
return num_tokens
|
return num_tokens
|
||||||
|
|
||||||
|
|
||||||
def valid_model(model):
|
def valid_model(model):
|
||||||
try:
|
try:
|
||||||
# for a given model name, check if the user has the right permissions to access the model
|
# for a given model name, check if the user has the right permissions to access the model
|
||||||
|
@ -1471,22 +1483,29 @@ def completion_with_fallbacks(**kwargs):
|
||||||
rate_limited_models = set()
|
rate_limited_models = set()
|
||||||
model_expiration_times = {}
|
model_expiration_times = {}
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
fallbacks = [kwargs['model']] + kwargs['fallbacks']
|
fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
|
||||||
del kwargs['fallbacks'] # remove fallbacks so it's not recursive
|
del kwargs["fallbacks"] # remove fallbacks so it's not recursive
|
||||||
|
|
||||||
while response == None and time.time() - start_time < 45:
|
while response == None and time.time() - start_time < 45:
|
||||||
for model in fallbacks:
|
for model in fallbacks:
|
||||||
# loop thru all models
|
# loop thru all models
|
||||||
try:
|
try:
|
||||||
if model in rate_limited_models: # check if model is currently cooling down
|
if (
|
||||||
if model_expiration_times.get(model) and time.time() >= model_expiration_times[model]:
|
model in rate_limited_models
|
||||||
rate_limited_models.remove(model) # check if it's been 60s of cool down and remove model
|
): # check if model is currently cooling down
|
||||||
|
if (
|
||||||
|
model_expiration_times.get(model)
|
||||||
|
and time.time() >= model_expiration_times[model]
|
||||||
|
):
|
||||||
|
rate_limited_models.remove(
|
||||||
|
model
|
||||||
|
) # check if it's been 60s of cool down and remove model
|
||||||
else:
|
else:
|
||||||
continue # skip model
|
continue # skip model
|
||||||
|
|
||||||
# delete model from kwargs if it exists
|
# delete model from kwargs if it exists
|
||||||
if kwargs.get('model'):
|
if kwargs.get("model"):
|
||||||
del kwargs['model']
|
del kwargs["model"]
|
||||||
|
|
||||||
print("making completion call", model)
|
print("making completion call", model)
|
||||||
response = litellm.completion(**kwargs, model=model)
|
response = litellm.completion(**kwargs, model=model)
|
||||||
|
@ -1497,7 +1516,9 @@ def completion_with_fallbacks(**kwargs):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"got exception {e} for model {model}")
|
print(f"got exception {e} for model {model}")
|
||||||
rate_limited_models.add(model)
|
rate_limited_models.add(model)
|
||||||
model_expiration_times[model] = time.time() + 60 # cool down this selected model
|
model_expiration_times[model] = (
|
||||||
#print(f"rate_limited_models {rate_limited_models}")
|
time.time() + 60
|
||||||
|
) # cool down this selected model
|
||||||
|
# print(f"rate_limited_models {rate_limited_models}")
|
||||||
pass
|
pass
|
||||||
return response
|
return response
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue