refactor: add black formatting

This commit is contained in:
Krrish Dholakia 2023-12-25 14:10:38 +05:30
parent b87d630b0a
commit 4905929de3
156 changed files with 19723 additions and 10869 deletions

View file

@ -9,33 +9,37 @@ import os
# Define the list of models to benchmark
# select any LLM listed here: https://docs.litellm.ai/docs/providers
models = ['gpt-3.5-turbo', 'claude-2']
models = ["gpt-3.5-turbo", "claude-2"]
# Enter LLM API keys
# https://docs.litellm.ai/docs/providers
os.environ['OPENAI_API_KEY'] = ""
os.environ['ANTHROPIC_API_KEY'] = ""
os.environ["OPENAI_API_KEY"] = ""
os.environ["ANTHROPIC_API_KEY"] = ""
# List of questions to benchmark (replace with your questions)
questions = [
"When will BerriAI IPO?",
"When will LiteLLM hit $100M ARR?"
]
questions = ["When will BerriAI IPO?", "When will LiteLLM hit $100M ARR?"]
# Enter your system prompt here
# Enter your system prompt here
system_prompt = """
You are LiteLLMs helpful assistant
"""
@click.command()
@click.option('--system-prompt', default="You are a helpful assistant that can answer questions.", help="System prompt for the conversation.")
@click.option(
"--system-prompt",
default="You are a helpful assistant that can answer questions.",
help="System prompt for the conversation.",
)
def main(system_prompt):
for question in questions:
data = [] # Data for the current question
with tqdm(total=len(models)) as pbar:
for model in models:
colored_description = colored(f"Running question: {question} for model: {model}", 'green')
colored_description = colored(
f"Running question: {question} for model: {model}", "green"
)
pbar.set_description(colored_description)
start_time = time.time()
@ -44,35 +48,43 @@ def main(system_prompt):
max_tokens=500,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": question}
{"role": "user", "content": question},
],
)
end = time.time()
total_time = end - start_time
cost = completion_cost(completion_response=response)
raw_response = response['choices'][0]['message']['content']
raw_response = response["choices"][0]["message"]["content"]
data.append({
'Model': colored(model, 'light_blue'),
'Response': raw_response, # Colorize the response
'ResponseTime': colored(f"{total_time:.2f} seconds", "red"),
'Cost': colored(f"${cost:.6f}", 'green'), # Colorize the cost
})
data.append(
{
"Model": colored(model, "light_blue"),
"Response": raw_response, # Colorize the response
"ResponseTime": colored(f"{total_time:.2f} seconds", "red"),
"Cost": colored(f"${cost:.6f}", "green"), # Colorize the cost
}
)
pbar.update(1)
# Separate headers from the data
headers = ['Model', 'Response', 'Response Time (seconds)', 'Cost ($)']
headers = ["Model", "Response", "Response Time (seconds)", "Cost ($)"]
colwidths = [15, 80, 15, 10]
# Create a nicely formatted table for the current question
table = tabulate([list(d.values()) for d in data], headers, tablefmt="grid", maxcolwidths=colwidths)
table = tabulate(
[list(d.values()) for d in data],
headers,
tablefmt="grid",
maxcolwidths=colwidths,
)
# Print the table for the current question
colored_question = colored(question, 'green')
colored_question = colored(question, "green")
click.echo(f"\nBenchmark Results for '{colored_question}':")
click.echo(table) # Display the formatted table
if __name__ == '__main__':
if __name__ == "__main__":
main()

View file

@ -1,25 +1,22 @@
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import *
###################
import litellm
# litellm completion call
question = "which country has the highest population"
response = litellm.completion(
model = "gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": question
}
],
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": question}],
)
print(response)
# use the auto eval Factuality() evaluator
@ -27,9 +24,11 @@ print(response)
print("calling evaluator")
evaluator = Factuality()
result = evaluator(
output=response.choices[0]["message"]["content"], # response from litellm.completion()
expected="India", # expected output
input=question # question passed to litellm.completion
output=response.choices[0]["message"][
"content"
], # response from litellm.completion()
expected="India", # expected output
input=question, # question passed to litellm.completion
)
print(result)
print(result)

View file

@ -4,9 +4,10 @@ from flask_cors import CORS
import traceback
import litellm
from util import handle_error
from litellm import completion
import os, dotenv, time
from litellm import completion
import os, dotenv, time
import json
dotenv.load_dotenv()
# TODO: set your keys in .env or here:
@ -19,57 +20,72 @@ verbose = True
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ##########
os.environ["PROMPTLAYER_API_KEY"] = "" # set your promptlayer key here - https://promptlayer.com/
os.environ[
"PROMPTLAYER_API_KEY"
] = "" # set your promptlayer key here - https://promptlayer.com/
# set callbacks
litellm.success_callback = ["promptlayer"]
############ HELPER FUNCTIONS ###################################
def print_verbose(print_statement):
if verbose:
print(print_statement)
app = Flask(__name__)
CORS(app)
@app.route('/')
@app.route("/")
def index():
return 'received!', 200
return "received!", 200
def data_generator(response):
for chunk in response:
yield f"data: {json.dumps(chunk)}\n\n"
@app.route('/chat/completions', methods=["POST"])
@app.route("/chat/completions", methods=["POST"])
def api_completion():
data = request.json
start_time = time.time()
if data.get('stream') == "True":
data['stream'] = True # convert to boolean
start_time = time.time()
if data.get("stream") == "True":
data["stream"] = True # convert to boolean
try:
if "prompt" not in data:
raise ValueError("data needs to have prompt")
data["model"] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
data[
"model"
] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
# COMPLETION CALL
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": data.pop("prompt")}]
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": data.pop("prompt")},
]
data["messages"] = messages
print(f"data: {data}")
response = completion(**data)
## LOG SUCCESS
end_time = time.time()
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return Response(data_generator(response), mimetype='text/event-stream')
end_time = time.time()
if (
"stream" in data and data["stream"] == True
): # use generate_responses to stream responses
return Response(data_generator(response), mimetype="text/event-stream")
except Exception as e:
# call handle_error function
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
## LOG FAILURE
end_time = time.time()
end_time = time.time()
traceback_exception = traceback.format_exc()
return handle_error(data=data)
return response
@app.route('/get_models', methods=["POST"])
@app.route("/get_models", methods=["POST"])
def get_models():
try:
return litellm.model_list
@ -78,7 +94,8 @@ def get_models():
response = {"error": str(e)}
return response, 200
if __name__ == "__main__":
from waitress import serve
serve(app, host="0.0.0.0", port=4000, threads=500)
if __name__ == "__main__":
from waitress import serve
serve(app, host="0.0.0.0", port=4000, threads=500)

View file

@ -3,27 +3,28 @@ from urllib.parse import urlparse, parse_qs
def get_next_url(response):
"""
"""
Function to get 'next' url from Link header
:param response: response from requests
:return: next url or None
"""
if 'link' not in response.headers:
return None
headers = response.headers
if "link" not in response.headers:
return None
headers = response.headers
next_url = headers['Link']
print(next_url)
start_index = next_url.find("<")
end_index = next_url.find(">")
next_url = headers["Link"]
print(next_url)
start_index = next_url.find("<")
end_index = next_url.find(">")
return next_url[1:end_index]
return next_url[1:end_index]
def get_models(url):
"""
Function to retrieve all models from paginated endpoint
:param url: base url to make GET request
:return: list of all models
Function to retrieve all models from paginated endpoint
:param url: base url to make GET request
:return: list of all models
"""
models = []
while url:
@ -36,19 +37,21 @@ def get_models(url):
models.extend(payload)
return models
def get_cleaned_models(models):
"""
Function to clean retrieved models
:param models: list of retrieved models
:return: list of cleaned models
Function to clean retrieved models
:param models: list of retrieved models
:return: list of cleaned models
"""
cleaned_models = []
for model in models:
cleaned_models.append(model["id"])
return cleaned_models
# Get text-generation models
url = 'https://huggingface.co/api/models?filter=text-generation-inference'
url = "https://huggingface.co/api/models?filter=text-generation-inference"
text_generation_models = get_models(url)
cleaned_text_generation_models = get_cleaned_models(text_generation_models)
@ -56,7 +59,7 @@ print(cleaned_text_generation_models)
# Get conversational models
url = 'https://huggingface.co/api/models?filter=conversational'
url = "https://huggingface.co/api/models?filter=conversational"
conversational_models = get_models(url)
cleaned_conversational_models = get_cleaned_models(conversational_models)
@ -65,19 +68,23 @@ print(cleaned_conversational_models)
def write_to_txt(cleaned_models, filename):
"""
Function to write the contents of a list to a text file
:param cleaned_models: list of cleaned models
:param filename: name of the text file
Function to write the contents of a list to a text file
:param cleaned_models: list of cleaned models
:param filename: name of the text file
"""
with open(filename, 'w') as f:
with open(filename, "w") as f:
for item in cleaned_models:
f.write("%s\n" % item)
# Write contents of cleaned_text_generation_models to text_generation_models.txt
write_to_txt(cleaned_text_generation_models, 'huggingface_llms_metadata/hf_text_generation_models.txt')
write_to_txt(
cleaned_text_generation_models,
"huggingface_llms_metadata/hf_text_generation_models.txt",
)
# Write contents of cleaned_conversational_models to conversational_models.txt
write_to_txt(cleaned_conversational_models, 'huggingface_llms_metadata/hf_conversational_models.txt')
write_to_txt(
cleaned_conversational_models,
"huggingface_llms_metadata/hf_conversational_models.txt",
)

View file

@ -1,4 +1,3 @@
import openai
api_base = f"http://0.0.0.0:8000"
@ -8,29 +7,29 @@ openai.api_key = "temp-key"
print(openai.api_base)
print(f'LiteLLM: response from proxy with streaming')
print(f"LiteLLM: response from proxy with streaming")
response = openai.ChatCompletion.create(
model="ollama/llama2",
messages = [
model="ollama/llama2",
messages=[
{
"role": "user",
"content": "this is a test request, acknowledge that you got it"
"content": "this is a test request, acknowledge that you got it",
}
],
stream=True
stream=True,
)
for chunk in response:
print(f'LiteLLM: streaming response from proxy {chunk}')
print(f"LiteLLM: streaming response from proxy {chunk}")
response = openai.ChatCompletion.create(
model="ollama/llama2",
messages = [
model="ollama/llama2",
messages=[
{
"role": "user",
"content": "this is a test request, acknowledge that you got it"
"content": "this is a test request, acknowledge that you got it",
}
]
],
)
print(f'LiteLLM: response from proxy {response}')
print(f"LiteLLM: response from proxy {response}")

View file

@ -12,42 +12,51 @@ import pytest
from litellm import Router
import litellm
litellm.set_verbose=False
litellm.set_verbose = False
os.environ.pop("AZURE_AD_TOKEN")
model_list = [{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
}
}]
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = ["test_questions/question1.txt", "test_questions/question2.txt", "test_questions/question3.txt"]
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
@ -59,10 +68,9 @@ for file_path in file_paths:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
@ -74,10 +82,18 @@ def make_openai_completion(question):
try:
start_time = time.time()
import openai
client = openai.OpenAI(api_key=os.environ['OPENAI_API_KEY'], base_url="http://0.0.0.0:8000") #base_url="http://0.0.0.0:8000",
client = openai.OpenAI(
api_key=os.environ["OPENAI_API_KEY"], base_url="http://0.0.0.0:8000"
) # base_url="http://0.0.0.0:8000",
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": f"You are a helpful assistant. Answer this question{question}"}],
messages=[
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
}
],
)
print(response)
end_time = time.time()
@ -92,11 +108,10 @@ def make_openai_completion(question):
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(
f"Question: {question[:100]}\nException: {str(e)}\n\n"
)
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 100
@ -133,4 +148,3 @@ with open("request_log.txt", "r") as log_file:
with open("error_log.txt", "r") as error_log_file:
print("\nError Log:\n", error_log_file.read())

View file

@ -12,42 +12,51 @@ import pytest
from litellm import Router
import litellm
litellm.set_verbose=False
litellm.set_verbose = False
# os.environ.pop("AZURE_AD_TOKEN")
model_list = [{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
}
}]
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = ["test_questions/question1.txt", "test_questions/question2.txt", "test_questions/question3.txt"]
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
@ -59,10 +68,9 @@ for file_path in file_paths:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
@ -76,9 +84,12 @@ def make_openai_completion(question):
import requests
data = {
'model': 'gpt-3.5-turbo',
'messages': [
{'role': 'system', 'content': f'You are a helpful assistant. Answer this question{question}'},
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
},
],
}
response = requests.post("http://0.0.0.0:8000/queue/request", json=data)
@ -89,8 +100,8 @@ def make_openai_completion(question):
log_file.write(
f"Question: {question[:100]}\nResponse ID: {response.get('id', 'N/A')} Url: {response.get('url', 'N/A')}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
# polling the url
# polling the url
while True:
try:
url = response["url"]
@ -107,7 +118,9 @@ def make_openai_completion(question):
)
break
print(f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}")
print(
f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}"
)
time.sleep(0.5)
except Exception as e:
print("got exception in polling", e)
@ -117,11 +130,10 @@ def make_openai_completion(question):
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(
f"Question: {question[:100]}\nException: {str(e)}\n\n"
)
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 10
@ -142,7 +154,7 @@ successful_calls = 0
failed_calls = 0
for future in futures:
if future.done():
if future.done():
if future.result() is not None:
successful_calls += 1
else:
@ -152,4 +164,3 @@ print(f"Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")

View file

@ -12,42 +12,51 @@ import pytest
from litellm import Router
import litellm
litellm.set_verbose=False
litellm.set_verbose = False
os.environ.pop("AZURE_AD_TOKEN")
model_list = [{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE")
}
}, {
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
}
}]
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = ["test_questions/question1.txt", "test_questions/question2.txt", "test_questions/question3.txt"]
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
@ -59,10 +68,9 @@ for file_path in file_paths:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
@ -75,7 +83,12 @@ def make_openai_completion(question):
start_time = time.time()
response = router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": f"You are a helpful assistant. Answer this question{question}"}],
messages=[
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
}
],
)
print(response)
end_time = time.time()
@ -90,11 +103,10 @@ def make_openai_completion(question):
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(
f"Question: {question[:100]}\nException: {str(e)}\n\n"
)
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 150
@ -131,4 +143,3 @@ with open("request_log.txt", "r") as log_file:
with open("error_log.txt", "r") as error_log_file:
print("\nError Log:\n", error_log_file.read())