temp comment out test_exceptions.py

This commit is contained in:
Ishaan Jaff 2023-08-01 13:28:22 -07:00 committed by Krrish Dholakia
parent c318ae13c7
commit 4c67fe6d6e

View file

@ -1,12 +1,12 @@
#### What this tests #### # #### What this tests ####
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type # # This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate # # 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate
# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) # # 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.)
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered # # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
import os import os
@ -19,112 +19,112 @@ from concurrent.futures import ThreadPoolExecutor
models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"] models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"]
# Test 1: Rate Limit Errors # # Test 1: Rate Limit Errors
def test_model(model): # def test_model(model):
try: # try:
sample_text = "how does a court case get to the Supreme Court?" * 50000 # sample_text = "how does a court case get to the Supreme Court?" * 50000
messages = [{ "content": sample_text,"role": "user"}] # messages = [{ "content": sample_text,"role": "user"}]
azure = False # azure = False
if model == "chatgpt-test": # if model == "chatgpt-test":
azure = True # azure = True
print(f"model: {model}") # print(f"model: {model}")
response = completion(model=model, messages=messages, azure=azure) # response = completion(model=model, messages=messages, azure=azure)
except RateLimitError: # except RateLimitError:
return True # return True
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
return True # return True
except Exception as e: # except Exception as e:
print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") # print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
pass # pass
return False # return False
# Repeat each model 500 times # # Repeat each model 500 times
extended_models = [model for model in models for _ in range(250)] # extended_models = [model for model in models for _ in range(250)]
def worker(model): # def worker(model):
return test_model(model) # return test_model(model)
# Create a dictionary to store the results # # Create a dictionary to store the results
counts = {True: 0, False: 0} # counts = {True: 0, False: 0}
# Use Thread Pool Executor # # Use Thread Pool Executor
with ThreadPoolExecutor(max_workers=500) as executor: # with ThreadPoolExecutor(max_workers=500) as executor:
# Use map to start the operation in thread pool # # Use map to start the operation in thread pool
results = executor.map(worker, extended_models) # results = executor.map(worker, extended_models)
# Iterate over results and count True/False # # Iterate over results and count True/False
for result in results: # for result in results:
counts[result] += 1 # counts[result] += 1
accuracy_score = counts[True]/(counts[True] + counts[False]) # accuracy_score = counts[True]/(counts[True] + counts[False])
print(f"accuracy_score: {accuracy_score}") # print(f"accuracy_score: {accuracy_score}")
# Test 2: Context Window Errors # # Test 2: Context Window Errors
print("Testing Context Window Errors") # print("Testing Context Window Errors")
def test_model(model): # pass extremely long input # def test_model(model): # pass extremely long input
sample_text = "how does a court case get to the Supreme Court?" * 100000 # sample_text = "how does a court case get to the Supreme Court?" * 100000
messages = [{ "content": sample_text,"role": "user"}] # messages = [{ "content": sample_text,"role": "user"}]
try: # try:
azure = False # azure = False
if model == "chatgpt-test": # if model == "chatgpt-test":
azure = True # azure = True
print(f"model: {model}") # print(f"model: {model}")
response = completion(model=model, messages=messages, azure=azure) # response = completion(model=model, messages=messages, azure=azure)
except InvalidRequestError: # except InvalidRequestError:
return True # return True
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
return True # return True
except Exception as e: # except Exception as e:
print(f"Error Type: {type(e).__name__}") # print(f"Error Type: {type(e).__name__}")
print(f"Uncaught Exception - {e}") # print(f"Uncaught Exception - {e}")
pass # pass
return False # return False
## TEST SCORE # ## TEST SCORE
true_val = 0 # true_val = 0
for model in models: # for model in models:
if test_model(model=model) == True: # if test_model(model=model) == True:
true_val += 1 # true_val += 1
accuracy_score = true_val/len(models) # accuracy_score = true_val/len(models)
print(f"CTX WINDOW accuracy_score: {accuracy_score}") # print(f"CTX WINDOW accuracy_score: {accuracy_score}")
# Test 3: InvalidAuth Errors # # Test 3: InvalidAuth Errors
def logger_fn(model_call_object: dict): # def logger_fn(model_call_object: dict):
print(f"model call details: {model_call_object}") # print(f"model call details: {model_call_object}")
def test_model(model): # set the model key to an invalid key, depending on the model # def test_model(model): # set the model key to an invalid key, depending on the model
messages = [{ "content": "Hello, how are you?","role": "user"}] # messages = [{ "content": "Hello, how are you?","role": "user"}]
try: # try:
azure = False # azure = False
if model == "gpt-3.5-turbo": # if model == "gpt-3.5-turbo":
os.environ["OPENAI_API_KEY"] = "bad-key" # os.environ["OPENAI_API_KEY"] = "bad-key"
elif model == "chatgpt-test": # elif model == "chatgpt-test":
os.environ["AZURE_API_KEY"] = "bad-key" # os.environ["AZURE_API_KEY"] = "bad-key"
azure = True # azure = True
elif model == "claude-instant-1": # elif model == "claude-instant-1":
os.environ["ANTHROPIC_API_KEY"] = "bad-key" # os.environ["ANTHROPIC_API_KEY"] = "bad-key"
elif model == "command-nightly": # elif model == "command-nightly":
os.environ["COHERE_API_KEY"] = "bad-key" # os.environ["COHERE_API_KEY"] = "bad-key"
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": # elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
os.environ["REPLICATE_API_KEY"] = "bad-key" # os.environ["REPLICATE_API_KEY"] = "bad-key"
os.environ["REPLICATE_API_TOKEN"] = "bad-key" # os.environ["REPLICATE_API_TOKEN"] = "bad-key"
print(f"model: {model}") # print(f"model: {model}")
response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn) # response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
print(f"response: {response}") # print(f"response: {response}")
except AuthenticationError as e: # except AuthenticationError as e:
return True # return True
except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
return True # return True
except Exception as e: # except Exception as e:
print(f"Uncaught Exception - {e}") # print(f"Uncaught Exception - {e}")
pass # pass
return False # return False
## TEST SCORE # ## TEST SCORE
true_val = 0 # true_val = 0
for model in models: # for model in models:
if test_model(model=model) == True: # if test_model(model=model) == True:
true_val += 1 # true_val += 1
accuracy_score = true_val/len(models) # accuracy_score = true_val/len(models)
print(f"INVALID AUTH accuracy_score: {accuracy_score}") # print(f"INVALID AUTH accuracy_score: {accuracy_score}")