forked from phoenix/litellm-mirror
fixing exception mapping
This commit is contained in:
parent
9b0e9bf57c
commit
92a13958ce
8 changed files with 188 additions and 115 deletions
|
@ -69,6 +69,6 @@ open_ai_embedding_models = [
|
||||||
'text-embedding-ada-002'
|
'text-embedding-ada-002'
|
||||||
]
|
]
|
||||||
from .timeout import timeout
|
from .timeout import timeout
|
||||||
from .utils import client, logging, exception_type # Import all the symbols from main.py
|
from .utils import client, logging, exception_type, get_optional_params # Import all the symbols from main.py
|
||||||
from .main import * # Import all the symbols from main.py
|
from .main import * # Import all the symbols from main.py
|
||||||
from .integrations import *
|
from .integrations import *
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -6,7 +6,7 @@ from functools import partial
|
||||||
import dotenv
|
import dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import client, logging, exception_type, timeout
|
from litellm import client, logging, exception_type, timeout, get_optional_params
|
||||||
import random
|
import random
|
||||||
import asyncio
|
import asyncio
|
||||||
from tenacity import (
|
from tenacity import (
|
||||||
|
@ -20,51 +20,6 @@ dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
# TODO move this to utils.py
|
# TODO move this to utils.py
|
||||||
# TODO add translations
|
# TODO add translations
|
||||||
# TODO see if this worked - model_name == krrish
|
# TODO see if this worked - model_name == krrish
|
||||||
def get_optional_params(
|
|
||||||
# 12 optional params
|
|
||||||
functions = [],
|
|
||||||
function_call = "",
|
|
||||||
temperature = 1,
|
|
||||||
top_p = 1,
|
|
||||||
n = 1,
|
|
||||||
stream = False,
|
|
||||||
stop = None,
|
|
||||||
max_tokens = float('inf'),
|
|
||||||
presence_penalty = 0,
|
|
||||||
frequency_penalty = 0,
|
|
||||||
logit_bias = {},
|
|
||||||
user = "",
|
|
||||||
deployment_id = None
|
|
||||||
):
|
|
||||||
optional_params = {}
|
|
||||||
if functions != []:
|
|
||||||
optional_params["functions"] = functions
|
|
||||||
if function_call != "":
|
|
||||||
optional_params["function_call"] = function_call
|
|
||||||
if temperature != 1:
|
|
||||||
optional_params["temperature"] = temperature
|
|
||||||
if top_p != 1:
|
|
||||||
optional_params["top_p"] = top_p
|
|
||||||
if n != 1:
|
|
||||||
optional_params["n"] = n
|
|
||||||
if stream:
|
|
||||||
optional_params["stream"] = stream
|
|
||||||
if stop != None:
|
|
||||||
optional_params["stop"] = stop
|
|
||||||
if max_tokens != float('inf'):
|
|
||||||
optional_params["max_tokens"] = max_tokens
|
|
||||||
if presence_penalty != 0:
|
|
||||||
optional_params["presence_penalty"] = presence_penalty
|
|
||||||
if frequency_penalty != 0:
|
|
||||||
optional_params["frequency_penalty"] = frequency_penalty
|
|
||||||
if logit_bias != {}:
|
|
||||||
optional_params["logit_bias"] = logit_bias
|
|
||||||
if user != "":
|
|
||||||
optional_params["user"] = user
|
|
||||||
if deployment_id != None:
|
|
||||||
optional_params["deployment_id"] = user
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
#############################################
|
#############################################
|
||||||
async def acompletion(*args, **kwargs):
|
async def acompletion(*args, **kwargs):
|
||||||
|
@ -285,12 +240,13 @@ def completion(
|
||||||
}
|
}
|
||||||
response = new_response
|
response = new_response
|
||||||
else:
|
else:
|
||||||
|
## LOGGING
|
||||||
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
|
||||||
args = locals()
|
args = locals()
|
||||||
raise ValueError(f"No valid completion model args passed in - {args}")
|
raise ValueError(f"No valid completion model args passed in - {args}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# log the original exception
|
## LOGGING
|
||||||
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
|
||||||
## Map to OpenAI Exception
|
## Map to OpenAI Exception
|
||||||
raise exception_type(model=model, original_exception=e)
|
raise exception_type(model=model, original_exception=e)
|
||||||
|
|
|
@ -8,6 +8,7 @@ from litellm import embedding, completion
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
litellm.failure_callback = ["sentry"]
|
||||||
# litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
#### What this tests ####
|
#### What this tests ####
|
||||||
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
||||||
|
@ -22,11 +23,16 @@ import pytest
|
||||||
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
||||||
models = ["command-nightly"]
|
models = ["command-nightly"]
|
||||||
def logging_fn(model_call_dict):
|
def logging_fn(model_call_dict):
|
||||||
print(f"model_call_dict: {model_call_dict['model']}")
|
if "model" in model_call_dict:
|
||||||
|
print(f"model_call_dict: {model_call_dict['model']}")
|
||||||
|
else:
|
||||||
|
print(f"model_call_dict: {model_call_dict}")
|
||||||
|
|
||||||
|
|
||||||
# Test 1: Context Window Errors
|
# Test 1: Context Window Errors
|
||||||
@pytest.mark.parametrize("model", models)
|
@pytest.mark.parametrize("model", models)
|
||||||
def test_context_window(model):
|
def test_context_window(model):
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 100000
|
sample_text = "how does a court case get to the Supreme Court?" * 5000
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
messages = [{"content": sample_text, "role": "user"}]
|
||||||
try:
|
try:
|
||||||
azure = model == "chatgpt-test"
|
azure = model == "chatgpt-test"
|
||||||
|
@ -41,44 +47,61 @@ def test_context_window(model):
|
||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Uncaught Error in test_context_window")
|
print("Uncaught Error in test_context_window")
|
||||||
# print(f"Error Type: {type(e).__name__}")
|
print(f"Error Type: {type(e).__name__}")
|
||||||
print(f"Uncaught Exception - {e}")
|
print(f"Uncaught Exception - {e}")
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
return
|
return
|
||||||
test_context_window("command-nightly")
|
test_context_window("command-nightly")
|
||||||
# # Test 2: InvalidAuth Errors
|
|
||||||
# def logger_fn(model_call_object: dict):
|
|
||||||
# print(f"model call details: {model_call_object}")
|
|
||||||
|
|
||||||
# @pytest.mark.parametrize("model", models)
|
|
||||||
# def invalid_auth(model): # set the model key to an invalid key, depending on the model
|
|
||||||
# messages = [{ "content": "Hello, how are you?","role": "user"}]
|
|
||||||
# try:
|
|
||||||
# azure = False
|
|
||||||
# if model == "gpt-3.5-turbo":
|
|
||||||
# os.environ["OPENAI_API_KEY"] = "bad-key"
|
|
||||||
# elif model == "chatgpt-test":
|
|
||||||
# os.environ["AZURE_API_KEY"] = "bad-key"
|
|
||||||
# azure = True
|
|
||||||
# elif model == "claude-instant-1":
|
|
||||||
# os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
|
||||||
# elif model == "command-nightly":
|
|
||||||
# os.environ["COHERE_API_KEY"] = "bad-key"
|
|
||||||
# elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
|
||||||
# os.environ["REPLICATE_API_KEY"] = "bad-key"
|
|
||||||
# os.environ["REPLICATE_API_TOKEN"] = "bad-key"
|
|
||||||
# print(f"model: {model}")
|
|
||||||
# response = completion(model=model, messages=messages, azure=azure)
|
|
||||||
# print(f"response: {response}")
|
|
||||||
# except AuthenticationError as e:
|
|
||||||
# return
|
|
||||||
# except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
|
||||||
# return
|
|
||||||
# except Exception as e:
|
|
||||||
# print(f"Uncaught Exception - {e}")
|
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
|
||||||
# return
|
|
||||||
|
|
||||||
|
# Test 2: InvalidAuth Errors
|
||||||
|
@pytest.mark.parametrize("model", models)
|
||||||
|
def invalid_auth(model): # set the model key to an invalid key, depending on the model
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
temporary_key = None
|
||||||
|
try:
|
||||||
|
azure = False
|
||||||
|
if model == "gpt-3.5-turbo":
|
||||||
|
temporary_key = os.environ["OPENAI_API_KEY"]
|
||||||
|
os.environ["OPENAI_API_KEY"] = "bad-key"
|
||||||
|
elif model == "chatgpt-test":
|
||||||
|
temporary_key = os.environ["AZURE_API_KEY"]
|
||||||
|
os.environ["AZURE_API_KEY"] = "bad-key"
|
||||||
|
azure = True
|
||||||
|
elif model == "claude-instant-1":
|
||||||
|
temporary_key = os.environ["ANTHROPIC_API_KEY"]
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
||||||
|
elif model == "command-nightly":
|
||||||
|
temporary_key = os.environ["COHERE_API_KEY"]
|
||||||
|
os.environ["COHERE_API_KEY"] = "bad-key"
|
||||||
|
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
||||||
|
temporary_key = os.environ["REPLICATE_API_KEY"]
|
||||||
|
os.environ["REPLICATE_API_KEY"] = "bad-key"
|
||||||
|
print(f"model: {model}")
|
||||||
|
response = completion(model=model, messages=messages, azure=azure)
|
||||||
|
print(f"response: {response}")
|
||||||
|
except AuthenticationError as e:
|
||||||
|
print(f"AuthenticationError Caught Exception - {e}")
|
||||||
|
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
|
||||||
|
print(f"OpenAIError Caught Exception - {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(type(e))
|
||||||
|
print(e.__class__.__name__)
|
||||||
|
print(f"Uncaught Exception - {e}")
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
if temporary_key != None: # reset the key
|
||||||
|
if model == "gpt-3.5-turbo":
|
||||||
|
os.environ["OPENAI_API_KEY"] = temporary_key
|
||||||
|
elif model == "chatgpt-test":
|
||||||
|
os.environ["AZURE_API_KEY"] = temporary_key
|
||||||
|
azure = True
|
||||||
|
elif model == "claude-instant-1":
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = temporary_key
|
||||||
|
elif model == "command-nightly":
|
||||||
|
os.environ["COHERE_API_KEY"] = temporary_key
|
||||||
|
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
||||||
|
os.environ["REPLICATE_API_KEY"] = temporary_key
|
||||||
|
return
|
||||||
|
invalid_auth("command-nightly")
|
||||||
# # Test 3: Rate Limit Errors
|
# # Test 3: Rate Limit Errors
|
||||||
# def test_model(model):
|
# def test_model(model):
|
||||||
# try:
|
# try:
|
||||||
|
|
134
litellm/utils.py
134
litellm/utils.py
|
@ -25,41 +25,44 @@ def print_verbose(print_statement):
|
||||||
|
|
||||||
####### LOGGING ###################
|
####### LOGGING ###################
|
||||||
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
#Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
|
def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None):
|
||||||
try:
|
try:
|
||||||
model_call_details = {}
|
model_call_details = {}
|
||||||
model_call_details["model"] = model
|
if model:
|
||||||
model_call_details["azure"] = azure
|
model_call_details["model"] = model
|
||||||
# log exception details
|
if azure:
|
||||||
|
model_call_details["azure"] = azure
|
||||||
if exception:
|
if exception:
|
||||||
model_call_details["original_exception"] = exception
|
model_call_details["original_exception"] = exception
|
||||||
|
|
||||||
if litellm.telemetry:
|
if litellm.telemetry:
|
||||||
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
safe_crash_reporting(model=model, exception=exception, azure=azure) # log usage-crash details. Do not log any user details. If you want to turn this off, set `litellm.telemetry=False`.
|
||||||
|
|
||||||
model_call_details["input"] = input
|
if input:
|
||||||
|
model_call_details["input"] = input
|
||||||
# log additional call details -> api key, etc.
|
# log additional call details -> api key, etc.
|
||||||
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
if model:
|
||||||
model_call_details["api_type"] = openai.api_type
|
if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
|
||||||
model_call_details["api_base"] = openai.api_base
|
model_call_details["api_type"] = openai.api_type
|
||||||
model_call_details["api_version"] = openai.api_version
|
model_call_details["api_base"] = openai.api_base
|
||||||
model_call_details["api_key"] = openai.api_key
|
model_call_details["api_version"] = openai.api_version
|
||||||
elif "replicate" in model:
|
model_call_details["api_key"] = openai.api_key
|
||||||
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
elif "replicate" in model:
|
||||||
elif model in litellm.anthropic_models:
|
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
||||||
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
elif model in litellm.anthropic_models:
|
||||||
elif model in litellm.cohere_models:
|
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
elif model in litellm.cohere_models:
|
||||||
model_call_details["additional_args"] = additional_args
|
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||||
|
model_call_details["additional_args"] = additional_args
|
||||||
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
||||||
print_verbose(f"Basic model call details: {model_call_details}")
|
print_verbose(f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}")
|
||||||
if logger_fn and callable(logger_fn):
|
if logger_fn and callable(logger_fn):
|
||||||
try:
|
try:
|
||||||
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object
|
||||||
except:
|
except Exception as e:
|
||||||
print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
print(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
||||||
except:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
print(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
####### CLIENT ###################
|
####### CLIENT ###################
|
||||||
|
@ -67,7 +70,7 @@ def logging(model, input, azure=False, additional_args={}, logger_fn=None, excep
|
||||||
def client(original_function):
|
def client(original_function):
|
||||||
def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
def function_setup(*args, **kwargs): #just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
|
||||||
try:
|
try:
|
||||||
global callback_list, add_breadcrumb
|
global callback_list, add_breadcrumb, user_logger_fn
|
||||||
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
|
if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0:
|
||||||
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
|
callback_list = list(set(litellm.success_callback + litellm.failure_callback))
|
||||||
set_callbacks(callback_list=callback_list,)
|
set_callbacks(callback_list=callback_list,)
|
||||||
|
@ -77,13 +80,15 @@ def client(original_function):
|
||||||
message=f"Positional Args: {args}, Keyword Args: {kwargs}",
|
message=f"Positional Args: {args}, Keyword Args: {kwargs}",
|
||||||
level="info",
|
level="info",
|
||||||
)
|
)
|
||||||
|
if "logger_fn" in kwargs:
|
||||||
|
user_logger_fn = kwargs["logger_fn"]
|
||||||
except: # DO NOT BLOCK running the function because of this
|
except: # DO NOT BLOCK running the function because of this
|
||||||
print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
|
print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
try:
|
try:
|
||||||
function_setup(args, kwargs)
|
function_setup(*args, **kwargs)
|
||||||
## MODEL CALL
|
## MODEL CALL
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
result = original_function(*args, **kwargs)
|
result = original_function(*args, **kwargs)
|
||||||
|
@ -100,6 +105,51 @@ def client(original_function):
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
|
def get_optional_params(
|
||||||
|
# 12 optional params
|
||||||
|
functions = [],
|
||||||
|
function_call = "",
|
||||||
|
temperature = 1,
|
||||||
|
top_p = 1,
|
||||||
|
n = 1,
|
||||||
|
stream = False,
|
||||||
|
stop = None,
|
||||||
|
max_tokens = float('inf'),
|
||||||
|
presence_penalty = 0,
|
||||||
|
frequency_penalty = 0,
|
||||||
|
logit_bias = {},
|
||||||
|
user = "",
|
||||||
|
deployment_id = None
|
||||||
|
):
|
||||||
|
optional_params = {}
|
||||||
|
if functions != []:
|
||||||
|
optional_params["functions"] = functions
|
||||||
|
if function_call != "":
|
||||||
|
optional_params["function_call"] = function_call
|
||||||
|
if temperature != 1:
|
||||||
|
optional_params["temperature"] = temperature
|
||||||
|
if top_p != 1:
|
||||||
|
optional_params["top_p"] = top_p
|
||||||
|
if n != 1:
|
||||||
|
optional_params["n"] = n
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
|
if stop != None:
|
||||||
|
optional_params["stop"] = stop
|
||||||
|
if max_tokens != float('inf'):
|
||||||
|
optional_params["max_tokens"] = max_tokens
|
||||||
|
if presence_penalty != 0:
|
||||||
|
optional_params["presence_penalty"] = presence_penalty
|
||||||
|
if frequency_penalty != 0:
|
||||||
|
optional_params["frequency_penalty"] = frequency_penalty
|
||||||
|
if logit_bias != {}:
|
||||||
|
optional_params["logit_bias"] = logit_bias
|
||||||
|
if user != "":
|
||||||
|
optional_params["user"] = user
|
||||||
|
if deployment_id != None:
|
||||||
|
optional_params["deployment_id"] = user
|
||||||
|
return optional_params
|
||||||
|
|
||||||
def set_callbacks(callback_list):
|
def set_callbacks(callback_list):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
|
||||||
try:
|
try:
|
||||||
|
@ -150,8 +200,8 @@ def set_callbacks(callback_list):
|
||||||
def handle_failure(exception, traceback_exception, args, kwargs):
|
def handle_failure(exception, traceback_exception, args, kwargs):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
|
||||||
try:
|
try:
|
||||||
print_verbose(f"handle_failure args: {args}")
|
# print_verbose(f"handle_failure args: {args}")
|
||||||
print_verbose(f"handle_failure kwargs: {kwargs}")
|
# print_verbose(f"handle_failure kwargs: {kwargs}")
|
||||||
|
|
||||||
success_handler = additional_details.pop("success_handler", None)
|
success_handler = additional_details.pop("success_handler", None)
|
||||||
failure_handler = additional_details.pop("failure_handler", None)
|
failure_handler = additional_details.pop("failure_handler", None)
|
||||||
|
@ -159,7 +209,8 @@ def handle_failure(exception, traceback_exception, args, kwargs):
|
||||||
additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
|
additional_details["Event_Name"] = additional_details.pop("failed_event_name", "litellm.failed_query")
|
||||||
print_verbose(f"self.failure_callback: {litellm.failure_callback}")
|
print_verbose(f"self.failure_callback: {litellm.failure_callback}")
|
||||||
|
|
||||||
print_verbose(f"additional_details: {additional_details}")
|
|
||||||
|
# print_verbose(f"additional_details: {additional_details}")
|
||||||
for callback in litellm.failure_callback:
|
for callback in litellm.failure_callback:
|
||||||
try:
|
try:
|
||||||
if callback == "slack":
|
if callback == "slack":
|
||||||
|
@ -206,7 +257,9 @@ def handle_failure(exception, traceback_exception, args, kwargs):
|
||||||
}
|
}
|
||||||
failure_handler(call_details)
|
failure_handler(call_details)
|
||||||
pass
|
pass
|
||||||
except:
|
except Exception as e:
|
||||||
|
## LOGGING
|
||||||
|
logging(logger_fn=user_logger_fn, exception=e)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def handle_success(args, kwargs, result, start_time, end_time):
|
def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
|
@ -245,12 +298,16 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
if success_handler and callable(success_handler):
|
if success_handler and callable(success_handler):
|
||||||
success_handler(args, kwargs)
|
success_handler(args, kwargs)
|
||||||
pass
|
pass
|
||||||
except:
|
except Exception as e:
|
||||||
|
## LOGGING
|
||||||
|
logging(logger_fn=user_logger_fn, exception=e)
|
||||||
print_verbose(f"Success Callback Error - {traceback.format_exc()}")
|
print_verbose(f"Success Callback Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def exception_type(model, original_exception):
|
def exception_type(model, original_exception):
|
||||||
|
global user_logger_fn
|
||||||
|
exception_mapping_worked = False
|
||||||
try:
|
try:
|
||||||
if isinstance(original_exception, OpenAIError):
|
if isinstance(original_exception, OpenAIError):
|
||||||
# Handle the OpenAIError
|
# Handle the OpenAIError
|
||||||
|
@ -265,32 +322,46 @@ def exception_type(model, original_exception):
|
||||||
if "status_code" in original_exception:
|
if "status_code" in original_exception:
|
||||||
print_verbose(f"status_code: {original_exception.status_code}")
|
print_verbose(f"status_code: {original_exception.status_code}")
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
|
raise AuthenticationError(f"AnthropicException - {original_exception.message}")
|
||||||
elif original_exception.status_code == 400:
|
elif original_exception.status_code == 400:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
|
raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
|
||||||
elif original_exception.status_code == 429:
|
elif original_exception.status_code == 429:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(f"AnthropicException - {original_exception.message}")
|
raise RateLimitError(f"AnthropicException - {original_exception.message}")
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
if "Incorrect authentication token" in error_str:
|
if "Incorrect authentication token" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(f"ReplicateException - {error_str}")
|
raise AuthenticationError(f"ReplicateException - {error_str}")
|
||||||
elif exception_type == "ModelError":
|
elif exception_type == "ModelError":
|
||||||
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
|
raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
|
||||||
elif "Request was throttled" in error_str:
|
elif "Request was throttled" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(f"ReplicateException - {error_str}")
|
raise RateLimitError(f"ReplicateException - {error_str}")
|
||||||
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
|
elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
|
||||||
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
|
raise ServiceUnavailableError(f"ReplicateException - {error_str}")
|
||||||
elif model == "command-nightly": #Cohere
|
elif model == "command-nightly": #Cohere
|
||||||
if "invalid api token" in error_str or "No API key provided." in error_str:
|
if "invalid api token" in error_str or "No API key provided." in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(f"CohereException - {error_str}")
|
raise AuthenticationError(f"CohereException - {error_str}")
|
||||||
elif "too many tokens" in error_str:
|
elif "too many tokens" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
|
raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
|
||||||
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
|
elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
|
||||||
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(f"CohereException - {original_exception.message}")
|
raise RateLimitError(f"CohereException - {original_exception.message}")
|
||||||
raise original_exception # base case - return the original exception
|
raise original_exception # base case - return the original exception
|
||||||
else:
|
else:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
except:
|
except Exception as e:
|
||||||
raise original_exception
|
## LOGGING
|
||||||
|
logging(logger_fn=user_logger_fn, additional_args={"original_exception": original_exception}, exception=e)
|
||||||
|
if exception_mapping_worked:
|
||||||
|
raise e
|
||||||
|
else: # don't let an error with mapping interrupt the user from receiving an error from the llm api calls
|
||||||
|
raise original_exception
|
||||||
|
|
||||||
def safe_crash_reporting(model=None, exception=None, azure=None):
|
def safe_crash_reporting(model=None, exception=None, azure=None):
|
||||||
data = {
|
data = {
|
||||||
|
@ -323,7 +394,6 @@ def litellm_telemetry(data):
|
||||||
'uuid': uuid_value,
|
'uuid': uuid_value,
|
||||||
'data': data
|
'data': data
|
||||||
}
|
}
|
||||||
print_verbose(f"payload: {payload}")
|
|
||||||
try:
|
try:
|
||||||
# Make the POST request to localhost:3000
|
# Make the POST request to localhost:3000
|
||||||
response = requests.post('https://litellm.berri.ai/logging', json=payload)
|
response = requests.post('https://litellm.berri.ai/logging', json=payload)
|
||||||
|
|
24
setup.py
Normal file
24
setup.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='litellm',
|
||||||
|
version='0.1.231',
|
||||||
|
description='Library to easily interface with LLM API providers',
|
||||||
|
author='BerriAI',
|
||||||
|
packages=[
|
||||||
|
'litellm'
|
||||||
|
],
|
||||||
|
package_data={
|
||||||
|
"litellm": ["integrations/*"], # Specify the directory path relative to your package
|
||||||
|
},
|
||||||
|
install_requires=[
|
||||||
|
'openai',
|
||||||
|
'cohere',
|
||||||
|
'pytest',
|
||||||
|
'anthropic',
|
||||||
|
'replicate',
|
||||||
|
'python-dotenv',
|
||||||
|
'openai[datalib]',
|
||||||
|
'tenacity'
|
||||||
|
],
|
||||||
|
)
|
Loading…
Add table
Add a link
Reference in a new issue