refactor test exceptions

This commit is contained in:
ishaan-jaff 2023-08-01 15:01:23 -07:00
parent 809e50d802
commit c99688df19

View file

@ -1,25 +1,82 @@
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
import os
import sys
import traceback
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
from concurrent.futures import ThreadPoolExecutor
import pytest
#### What this tests #### #### What this tests ####
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type # This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
# # 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate # 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate
# # 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) # 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.)
# # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
# from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
# import os
# import sys
# import traceback
# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
# import litellm
# from litellm import embedding, completion
# from concurrent.futures import ThreadPoolExecutor
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"] # Test 1: Context Window Errors
@pytest.mark.parametrize("model", models)
def test_context_window(model):
sample_text = "how does a court case get to the Supreme Court?" * 100000
messages = [{"content": sample_text, "role": "user"}]
try:
azure = model == "chatgpt-test"
print(f"model: {model}")
response = completion(model=model, messages=messages, azure=azure)
except InvalidRequestError:
print("InvalidRequestError")
return
except OpenAIError:
print("OpenAIError")
return
except Exception as e:
print("Uncaught Error in test_context_window")
print(f"Error Type: {type(e).__name__}")
print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}")
return
# # Test 1: Rate Limit Errors # Test 2: InvalidAuth Errors
def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}")
@pytest.mark.parametrize("model", models)
def test_invalid_auth(model): # set the model key to an invalid key, depending on the model
messages = [{ "content": "Hello, how are you?","role": "user"}]
try:
azure = False
if model == "gpt-3.5-turbo":
os.environ["OPENAI_API_KEY"] = "bad-key"
elif model == "chatgpt-test":
os.environ["AZURE_API_KEY"] = "bad-key"
azure = True
elif model == "claude-instant-1":
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
elif model == "command-nightly":
os.environ["COHERE_API_KEY"] = "bad-key"
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
os.environ["REPLICATE_API_KEY"] = "bad-key"
os.environ["REPLICATE_API_TOKEN"] = "bad-key"
print(f"model: {model}")
response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
print(f"response: {response}")
except AuthenticationError as e:
return
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
return
except Exception as e:
print(f"Uncaught Exception - {e}")
pytest.fail(f"Error occurred: {e}")
return
# # Test 3: Rate Limit Errors
# def test_model(model): # def test_model(model):
# try: # try:
# sample_text = "how does a court case get to the Supreme Court?" * 50000 # sample_text = "how does a court case get to the Supreme Court?" * 50000
@ -59,72 +116,4 @@
# accuracy_score = counts[True]/(counts[True] + counts[False]) # accuracy_score = counts[True]/(counts[True] + counts[False])
# print(f"accuracy_score: {accuracy_score}") # print(f"accuracy_score: {accuracy_score}")
# # Test 2: Context Window Errors
# print("Testing Context Window Errors")
# def test_model(model): # pass extremely long input
# sample_text = "how does a court case get to the Supreme Court?" * 100000
# messages = [{ "content": sample_text,"role": "user"}]
# try:
# azure = False
# if model == "chatgpt-test":
# azure = True
# print(f"model: {model}")
# response = completion(model=model, messages=messages, azure=azure)
# except InvalidRequestError:
# return True
# except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
# return True
# except Exception as e:
# print(f"Error Type: {type(e).__name__}")
# print(f"Uncaught Exception - {e}")
# pass
# return False
# ## TEST SCORE
# true_val = 0
# for model in models:
# if test_model(model=model) == True:
# true_val += 1
# accuracy_score = true_val/len(models)
# print(f"CTX WINDOW accuracy_score: {accuracy_score}")
# # Test 3: InvalidAuth Errors
# def logger_fn(model_call_object: dict):
# print(f"model call details: {model_call_object}")
# def test_model(model): # set the model key to an invalid key, depending on the model
# messages = [{ "content": "Hello, how are you?","role": "user"}]
# try:
# azure = False
# if model == "gpt-3.5-turbo":
# os.environ["OPENAI_API_KEY"] = "bad-key"
# elif model == "chatgpt-test":
# os.environ["AZURE_API_KEY"] = "bad-key"
# azure = True
# elif model == "claude-instant-1":
# os.environ["ANTHROPIC_API_KEY"] = "bad-key"
# elif model == "command-nightly":
# os.environ["COHERE_API_KEY"] = "bad-key"
# elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
# os.environ["REPLICATE_API_KEY"] = "bad-key"
# os.environ["REPLICATE_API_TOKEN"] = "bad-key"
# print(f"model: {model}")
# response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
# print(f"response: {response}")
# except AuthenticationError as e:
# return True
# except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
# return True
# except Exception as e:
# print(f"Uncaught Exception - {e}")
# pass
# return False
# ## TEST SCORE
# true_val = 0
# for model in models:
# if test_model(model=model) == True:
# true_val += 1
# accuracy_score = true_val/len(models)
# print(f"INVALID AUTH accuracy_score: {accuracy_score}")