refactor: add black formatting

2023-12-25 14:10:38 +05:30 · 2023-12-25 14:10:38 +05:30 · 4905929de3
commit 4905929de3
parent b87d630b0a
156 changed files with 19723 additions and 10869 deletions
--- a/cookbook/benchmark/benchmark.py
+++ b/cookbook/benchmark/benchmark.py
@ -9,33 +9,37 @@ import os

 # Define the list of models to benchmark
 # select any LLM listed here: https://docs.litellm.ai/docs/providers
-models = ['gpt-3.5-turbo', 'claude-2']
+models = ["gpt-3.5-turbo", "claude-2"]

 # Enter LLM API keys
 # https://docs.litellm.ai/docs/providers
-os.environ['OPENAI_API_KEY'] = ""
-os.environ['ANTHROPIC_API_KEY'] = ""
+os.environ["OPENAI_API_KEY"] = ""
+os.environ["ANTHROPIC_API_KEY"] = ""

 # List of questions to benchmark (replace with your questions)
-questions = [
-    "When will BerriAI IPO?",
-    "When will LiteLLM hit $100M ARR?"
-]
+questions = ["When will BerriAI IPO?", "When will LiteLLM hit $100M ARR?"]

-# Enter your system prompt here 
+# Enter your system prompt here
 system_prompt = """
 You are LiteLLMs helpful assistant
 """

+
@click.command()
-@click.option('--system-prompt', default="You are a helpful assistant that can answer questions.", help="System prompt for the conversation.")
+@click.option(
+    "--system-prompt",
+    default="You are a helpful assistant that can answer questions.",
+    help="System prompt for the conversation.",
+)
 def main(system_prompt):
    for question in questions:
        data = []  # Data for the current question

        with tqdm(total=len(models)) as pbar:
            for model in models:
-                colored_description = colored(f"Running question: {question} for model: {model}", 'green')
+                colored_description = colored(
+                    f"Running question: {question} for model: {model}", "green"
+                )
                pbar.set_description(colored_description)
                start_time = time.time()

@ -44,35 +48,43 @@ def main(system_prompt):
                    max_tokens=500,
                    messages=[
                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": question}
+                        {"role": "user", "content": question},
                    ],
                )

                end = time.time()
                total_time = end - start_time
                cost = completion_cost(completion_response=response)
-                raw_response = response['choices'][0]['message']['content']
+                raw_response = response["choices"][0]["message"]["content"]

-                data.append({
-                    'Model': colored(model, 'light_blue'),
-                    'Response': raw_response,  # Colorize the response
-                    'ResponseTime': colored(f"{total_time:.2f} seconds", "red"),
-                    'Cost': colored(f"${cost:.6f}", 'green'),  # Colorize the cost
-                })
+                data.append(
+                    {
+                        "Model": colored(model, "light_blue"),
+                        "Response": raw_response,  # Colorize the response
+                        "ResponseTime": colored(f"{total_time:.2f} seconds", "red"),
+                        "Cost": colored(f"${cost:.6f}", "green"),  # Colorize the cost
+                    }
+                )

                pbar.update(1)

        # Separate headers from the data
-        headers = ['Model', 'Response', 'Response Time (seconds)', 'Cost ($)']
+        headers = ["Model", "Response", "Response Time (seconds)", "Cost ($)"]
        colwidths = [15, 80, 15, 10]

        # Create a nicely formatted table for the current question
-        table = tabulate([list(d.values()) for d in data], headers, tablefmt="grid", maxcolwidths=colwidths)
-        
+        table = tabulate(
+            [list(d.values()) for d in data],
+            headers,
+            tablefmt="grid",
+            maxcolwidths=colwidths,
+        )
+
        # Print the table for the current question
-        colored_question = colored(question, 'green')
+        colored_question = colored(question, "green")
        click.echo(f"\nBenchmark Results for '{colored_question}':")
        click.echo(table)  # Display the formatted table

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    main()
--- a/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
+++ b/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
@ -1,25 +1,22 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
+
 load_dotenv()

 import litellm
 from litellm import embedding, completion, completion_cost

 from autoevals.llm import *
+
 ###################
 import litellm

 # litellm completion call
 question = "which country has the highest population"
 response = litellm.completion(
-    model = "gpt-3.5-turbo",
-    messages = [
-        {
-            "role": "user",
-            "content": question
-        }
-    ],
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": question}],
 )
 print(response)
 # use the auto eval Factuality() evaluator
@ -27,9 +24,11 @@ print(response)
 print("calling evaluator")
 evaluator = Factuality()
 result = evaluator(
-    output=response.choices[0]["message"]["content"],       # response from litellm.completion()
-    expected="India",                                       # expected output
-    input=question                                          # question passed to litellm.completion
+    output=response.choices[0]["message"][
+        "content"
+    ],  # response from litellm.completion()
+    expected="India",  # expected output
+    input=question,  # question passed to litellm.completion
 )

-print(result)
+print(result)
--- a/cookbook/codellama-server/main.py
+++ b/cookbook/codellama-server/main.py
@ -4,9 +4,10 @@ from flask_cors import CORS
 import traceback
 import litellm
 from util import handle_error
-from litellm import completion 
-import os, dotenv, time 
+from litellm import completion
+import os, dotenv, time
 import json
+
 dotenv.load_dotenv()

 # TODO: set your keys in .env or here:
@ -19,57 +20,72 @@ verbose = True

 # litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
 ######### PROMPT LOGGING ##########
-os.environ["PROMPTLAYER_API_KEY"] = "" # set your promptlayer key here - https://promptlayer.com/
+os.environ[
+    "PROMPTLAYER_API_KEY"
+] = ""  # set your promptlayer key here - https://promptlayer.com/

 # set callbacks
 litellm.success_callback = ["promptlayer"]
 ############ HELPER FUNCTIONS ###################################

+
 def print_verbose(print_statement):
    if verbose:
        print(print_statement)

+
 app = Flask(__name__)
 CORS(app)

-@app.route('/')
+
+@app.route("/")
 def index():
-    return 'received!', 200
+    return "received!", 200
+

 def data_generator(response):
    for chunk in response:
        yield f"data: {json.dumps(chunk)}\n\n"

-@app.route('/chat/completions', methods=["POST"])
+
+@app.route("/chat/completions", methods=["POST"])
 def api_completion():
    data = request.json
-    start_time = time.time() 
-    if data.get('stream') == "True":
-        data['stream'] = True # convert to boolean
+    start_time = time.time()
+    if data.get("stream") == "True":
+        data["stream"] = True  # convert to boolean
    try:
        if "prompt" not in data:
            raise ValueError("data needs to have prompt")
-        data["model"] = "togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
+        data[
+            "model"
+        ] = "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
        # COMPLETION CALL
        system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
-        messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": data.pop("prompt")}]
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": data.pop("prompt")},
+        ]
        data["messages"] = messages
        print(f"data: {data}")
        response = completion(**data)
        ## LOG SUCCESS
-        end_time = time.time() 
-        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-            return Response(data_generator(response), mimetype='text/event-stream')
+        end_time = time.time()
+        if (
+            "stream" in data and data["stream"] == True
+        ):  # use generate_responses to stream responses
+            return Response(data_generator(response), mimetype="text/event-stream")
    except Exception as e:
        # call handle_error function
        print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
        ## LOG FAILURE
-        end_time = time.time() 
+        end_time = time.time()
        traceback_exception = traceback.format_exc()
        return handle_error(data=data)
    return response

-@app.route('/get_models', methods=["POST"])
+
+@app.route("/get_models", methods=["POST"])
 def get_models():
    try:
        return litellm.model_list
@ -78,7 +94,8 @@ def get_models():
        response = {"error": str(e)}
    return response, 200

-if __name__ == "__main__":
-  from waitress import serve
-  serve(app, host="0.0.0.0", port=4000, threads=500)

+if __name__ == "__main__":
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=4000, threads=500)
--- a/cookbook/community-resources/get_hf_models.py
+++ b/cookbook/community-resources/get_hf_models.py
@ -3,27 +3,28 @@ from urllib.parse import urlparse, parse_qs


 def get_next_url(response):
-  """
+    """
    Function to get 'next' url from Link header
    :param response: response from requests
    :return: next url or None
    """
-  if 'link' not in response.headers:
-    return None
-  headers = response.headers
+    if "link" not in response.headers:
+        return None
+    headers = response.headers

-  next_url = headers['Link']
-  print(next_url)
-  start_index = next_url.find("<")
-  end_index = next_url.find(">")
+    next_url = headers["Link"]
+    print(next_url)
+    start_index = next_url.find("<")
+    end_index = next_url.find(">")
+
+    return next_url[1:end_index]

-  return next_url[1:end_index]

 def get_models(url):
    """
-        Function to retrieve all models from paginated endpoint
-        :param url: base url to make GET request
-        :return: list of all models
+    Function to retrieve all models from paginated endpoint
+    :param url: base url to make GET request
+    :return: list of all models
    """
    models = []
    while url:
@ -36,19 +37,21 @@ def get_models(url):
        models.extend(payload)
    return models

+
 def get_cleaned_models(models):
    """
-        Function to clean retrieved models
-        :param models: list of retrieved models
-        :return: list of cleaned models
+    Function to clean retrieved models
+    :param models: list of retrieved models
+    :return: list of cleaned models
    """
    cleaned_models = []
    for model in models:
        cleaned_models.append(model["id"])
    return cleaned_models

+
 # Get text-generation models
-url = 'https://huggingface.co/api/models?filter=text-generation-inference'
+url = "https://huggingface.co/api/models?filter=text-generation-inference"
 text_generation_models = get_models(url)
 cleaned_text_generation_models = get_cleaned_models(text_generation_models)

@ -56,7 +59,7 @@ print(cleaned_text_generation_models)


 # Get conversational models
-url = 'https://huggingface.co/api/models?filter=conversational'
+url = "https://huggingface.co/api/models?filter=conversational"
 conversational_models = get_models(url)
 cleaned_conversational_models = get_cleaned_models(conversational_models)

@ -65,19 +68,23 @@ print(cleaned_conversational_models)

 def write_to_txt(cleaned_models, filename):
    """
-        Function to write the contents of a list to a text file
-        :param cleaned_models: list of cleaned models
-        :param filename: name of the text file
+    Function to write the contents of a list to a text file
+    :param cleaned_models: list of cleaned models
+    :param filename: name of the text file
    """
-    with open(filename, 'w') as f:
+    with open(filename, "w") as f:
        for item in cleaned_models:
            f.write("%s\n" % item)


 # Write contents of cleaned_text_generation_models to text_generation_models.txt
-write_to_txt(cleaned_text_generation_models, 'huggingface_llms_metadata/hf_text_generation_models.txt')
+write_to_txt(
+    cleaned_text_generation_models,
+    "huggingface_llms_metadata/hf_text_generation_models.txt",
+)

 # Write contents of cleaned_conversational_models to conversational_models.txt
-write_to_txt(cleaned_conversational_models, 'huggingface_llms_metadata/hf_conversational_models.txt')
-
-
+write_to_txt(
+    cleaned_conversational_models,
+    "huggingface_llms_metadata/hf_conversational_models.txt",
+)
--- a/cookbook/litellm-ollama-docker-image/test.py
+++ b/cookbook/litellm-ollama-docker-image/test.py
@ -1,4 +1,3 @@
-
 import openai

 api_base = f"http://0.0.0.0:8000"
@ -8,29 +7,29 @@ openai.api_key = "temp-key"
 print(openai.api_base)


-print(f'LiteLLM: response from proxy with streaming')
+print(f"LiteLLM: response from proxy with streaming")
 response = openai.ChatCompletion.create(
-    model="ollama/llama2", 
-    messages = [
+    model="ollama/llama2",
+    messages=[
        {
            "role": "user",
-            "content": "this is a test request, acknowledge that you got it"
+            "content": "this is a test request, acknowledge that you got it",
        }
    ],
-    stream=True
+    stream=True,
 )

 for chunk in response:
-    print(f'LiteLLM: streaming response from proxy {chunk}')
+    print(f"LiteLLM: streaming response from proxy {chunk}")

 response = openai.ChatCompletion.create(
-    model="ollama/llama2", 
-    messages = [
+    model="ollama/llama2",
+    messages=[
        {
            "role": "user",
-            "content": "this is a test request, acknowledge that you got it"
+            "content": "this is a test request, acknowledge that you got it",
        }
-    ]
+    ],
 )

-print(f'LiteLLM: response from proxy {response}')
+print(f"LiteLLM: response from proxy {response}")
--- a/cookbook/litellm_router/load_test_proxy.py
+++ b/cookbook/litellm_router/load_test_proxy.py
@ -12,42 +12,51 @@ import pytest

 from litellm import Router
 import litellm
-litellm.set_verbose=False
+
+litellm.set_verbose = False
 os.environ.pop("AZURE_AD_TOKEN")

-model_list = [{ # list of model deployments 
-    "model_name": "gpt-3.5-turbo", # model alias 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "azure/chatgpt-v-2", # actual model name
-        "api_key": os.getenv("AZURE_API_KEY"),
-        "api_version": os.getenv("AZURE_API_VERSION"),
-        "api_base": os.getenv("AZURE_API_BASE")
-    }
-}, {
-    "model_name": "gpt-3.5-turbo", 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "azure/chatgpt-functioncalling", 
-        "api_key": os.getenv("AZURE_API_KEY"),
-        "api_version": os.getenv("AZURE_API_VERSION"),
-        "api_base": os.getenv("AZURE_API_BASE")
-    }
-}, {
-    "model_name": "gpt-3.5-turbo", 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "gpt-3.5-turbo", 
-        "api_key": os.getenv("OPENAI_API_KEY"),
-    }
-}]
+model_list = [
+    {  # list of model deployments
+        "model_name": "gpt-3.5-turbo",  # model alias
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "azure/chatgpt-v-2",  # actual model name
+            "api_key": os.getenv("AZURE_API_KEY"),
+            "api_version": os.getenv("AZURE_API_VERSION"),
+            "api_base": os.getenv("AZURE_API_BASE"),
+        },
+    },
+    {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "azure/chatgpt-functioncalling",
+            "api_key": os.getenv("AZURE_API_KEY"),
+            "api_version": os.getenv("AZURE_API_VERSION"),
+            "api_base": os.getenv("AZURE_API_BASE"),
+        },
+    },
+    {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "gpt-3.5-turbo",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+        },
+    },
+]
 router = Router(model_list=model_list)


-file_paths = ["test_questions/question1.txt", "test_questions/question2.txt", "test_questions/question3.txt"]
+file_paths = [
+    "test_questions/question1.txt",
+    "test_questions/question2.txt",
+    "test_questions/question3.txt",
+]
 questions = []

 for file_path in file_paths:
    try:
        print(file_path)
-        with open(file_path, 'r') as file:
+        with open(file_path, "r") as file:
            content = file.read()
            questions.append(content)
    except FileNotFoundError as e:
@ -59,10 +68,9 @@ for file_path in file_paths:
 #     print(q)


-
 # make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
-#  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere 
-# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions 
+#  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
+# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions

 import concurrent.futures
 import random
@ -74,10 +82,18 @@ def make_openai_completion(question):
    try:
        start_time = time.time()
        import openai
-        client = openai.OpenAI(api_key=os.environ['OPENAI_API_KEY'], base_url="http://0.0.0.0:8000") #base_url="http://0.0.0.0:8000",
+
+        client = openai.OpenAI(
+            api_key=os.environ["OPENAI_API_KEY"], base_url="http://0.0.0.0:8000"
+        )  # base_url="http://0.0.0.0:8000",
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
-            messages=[{"role": "system", "content": f"You are a helpful assistant. Answer this question{question}"}],
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"You are a helpful assistant. Answer this question{question}",
+                }
+            ],
        )
        print(response)
        end_time = time.time()
@ -92,11 +108,10 @@ def make_openai_completion(question):
    except Exception as e:
        # Log exceptions for failed calls
        with open("error_log.txt", "a") as error_log_file:
-            error_log_file.write(
-                f"Question: {question[:100]}\nException: {str(e)}\n\n"
-            )
+            error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
        return None

+
 # Number of concurrent calls (you can adjust this)
 concurrent_calls = 100

@ -133,4 +148,3 @@ with open("request_log.txt", "r") as log_file:

 with open("error_log.txt", "r") as error_log_file:
    print("\nError Log:\n", error_log_file.read())
-
--- a/cookbook/litellm_router/load_test_queuing.py
+++ b/cookbook/litellm_router/load_test_queuing.py
@ -12,42 +12,51 @@ import pytest

 from litellm import Router
 import litellm
-litellm.set_verbose=False
+
+litellm.set_verbose = False
 # os.environ.pop("AZURE_AD_TOKEN")

-model_list = [{ # list of model deployments 
-    "model_name": "gpt-3.5-turbo", # model alias 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "azure/chatgpt-v-2", # actual model name
-        "api_key": os.getenv("AZURE_API_KEY"),
-        "api_version": os.getenv("AZURE_API_VERSION"),
-        "api_base": os.getenv("AZURE_API_BASE")
-    }
-}, {
-    "model_name": "gpt-3.5-turbo", 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "azure/chatgpt-functioncalling", 
-        "api_key": os.getenv("AZURE_API_KEY"),
-        "api_version": os.getenv("AZURE_API_VERSION"),
-        "api_base": os.getenv("AZURE_API_BASE")
-    }
-}, {
-    "model_name": "gpt-3.5-turbo", 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "gpt-3.5-turbo", 
-        "api_key": os.getenv("OPENAI_API_KEY"),
-    }
-}]
+model_list = [
+    {  # list of model deployments
+        "model_name": "gpt-3.5-turbo",  # model alias
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "azure/chatgpt-v-2",  # actual model name
+            "api_key": os.getenv("AZURE_API_KEY"),
+            "api_version": os.getenv("AZURE_API_VERSION"),
+            "api_base": os.getenv("AZURE_API_BASE"),
+        },
+    },
+    {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "azure/chatgpt-functioncalling",
+            "api_key": os.getenv("AZURE_API_KEY"),
+            "api_version": os.getenv("AZURE_API_VERSION"),
+            "api_base": os.getenv("AZURE_API_BASE"),
+        },
+    },
+    {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "gpt-3.5-turbo",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+        },
+    },
+]
 router = Router(model_list=model_list)


-file_paths = ["test_questions/question1.txt", "test_questions/question2.txt", "test_questions/question3.txt"]
+file_paths = [
+    "test_questions/question1.txt",
+    "test_questions/question2.txt",
+    "test_questions/question3.txt",
+]
 questions = []

 for file_path in file_paths:
    try:
        print(file_path)
-        with open(file_path, 'r') as file:
+        with open(file_path, "r") as file:
            content = file.read()
            questions.append(content)
    except FileNotFoundError as e:
@ -59,10 +68,9 @@ for file_path in file_paths:
 #     print(q)


-
 # make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
-#  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere 
-# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions 
+#  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
+# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions

 import concurrent.futures
 import random
@ -76,9 +84,12 @@ def make_openai_completion(question):
        import requests

        data = {
-            'model': 'gpt-3.5-turbo',
-            'messages': [
-                {'role': 'system', 'content': f'You are a helpful assistant. Answer this question{question}'},
+            "model": "gpt-3.5-turbo",
+            "messages": [
+                {
+                    "role": "system",
+                    "content": f"You are a helpful assistant. Answer this question{question}",
+                },
            ],
        }
        response = requests.post("http://0.0.0.0:8000/queue/request", json=data)
@ -89,8 +100,8 @@ def make_openai_completion(question):
            log_file.write(
                f"Question: {question[:100]}\nResponse ID: {response.get('id', 'N/A')} Url: {response.get('url', 'N/A')}\nTime: {end_time - start_time:.2f} seconds\n\n"
            )
-        
-        # polling the url 
+
+        # polling the url
        while True:
            try:
                url = response["url"]
@ -107,7 +118,9 @@ def make_openai_completion(question):
                        )

                    break
-                print(f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}")
+                print(
+                    f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}"
+                )
                time.sleep(0.5)
            except Exception as e:
                print("got exception in polling", e)
@ -117,11 +130,10 @@ def make_openai_completion(question):
    except Exception as e:
        # Log exceptions for failed calls
        with open("error_log.txt", "a") as error_log_file:
-            error_log_file.write(
-                f"Question: {question[:100]}\nException: {str(e)}\n\n"
-            )
+            error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
        return None

+
 # Number of concurrent calls (you can adjust this)
 concurrent_calls = 10

@ -142,7 +154,7 @@ successful_calls = 0
 failed_calls = 0

 for future in futures:
-    if future.done(): 
+    if future.done():
        if future.result() is not None:
            successful_calls += 1
        else:
@ -152,4 +164,3 @@ print(f"Load test Summary:")
 print(f"Total Requests: {concurrent_calls}")
 print(f"Successful Calls: {successful_calls}")
 print(f"Failed Calls: {failed_calls}")
-
--- a/cookbook/litellm_router/load_test_router.py
+++ b/cookbook/litellm_router/load_test_router.py
@ -12,42 +12,51 @@ import pytest

 from litellm import Router
 import litellm
-litellm.set_verbose=False
+
+litellm.set_verbose = False
 os.environ.pop("AZURE_AD_TOKEN")

-model_list = [{ # list of model deployments 
-    "model_name": "gpt-3.5-turbo", # model alias 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "azure/chatgpt-v-2", # actual model name
-        "api_key": os.getenv("AZURE_API_KEY"),
-        "api_version": os.getenv("AZURE_API_VERSION"),
-        "api_base": os.getenv("AZURE_API_BASE")
-    }
-}, {
-    "model_name": "gpt-3.5-turbo", 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "azure/chatgpt-functioncalling", 
-        "api_key": os.getenv("AZURE_API_KEY"),
-        "api_version": os.getenv("AZURE_API_VERSION"),
-        "api_base": os.getenv("AZURE_API_BASE")
-    }
-}, {
-    "model_name": "gpt-3.5-turbo", 
-    "litellm_params": { # params for litellm completion/embedding call 
-        "model": "gpt-3.5-turbo", 
-        "api_key": os.getenv("OPENAI_API_KEY"),
-    }
-}]
+model_list = [
+    {  # list of model deployments
+        "model_name": "gpt-3.5-turbo",  # model alias
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "azure/chatgpt-v-2",  # actual model name
+            "api_key": os.getenv("AZURE_API_KEY"),
+            "api_version": os.getenv("AZURE_API_VERSION"),
+            "api_base": os.getenv("AZURE_API_BASE"),
+        },
+    },
+    {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "azure/chatgpt-functioncalling",
+            "api_key": os.getenv("AZURE_API_KEY"),
+            "api_version": os.getenv("AZURE_API_VERSION"),
+            "api_base": os.getenv("AZURE_API_BASE"),
+        },
+    },
+    {
+        "model_name": "gpt-3.5-turbo",
+        "litellm_params": {  # params for litellm completion/embedding call
+            "model": "gpt-3.5-turbo",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+        },
+    },
+]
 router = Router(model_list=model_list)


-file_paths = ["test_questions/question1.txt", "test_questions/question2.txt", "test_questions/question3.txt"]
+file_paths = [
+    "test_questions/question1.txt",
+    "test_questions/question2.txt",
+    "test_questions/question3.txt",
+]
 questions = []

 for file_path in file_paths:
    try:
        print(file_path)
-        with open(file_path, 'r') as file:
+        with open(file_path, "r") as file:
            content = file.read()
            questions.append(content)
    except FileNotFoundError as e:
@ -59,10 +68,9 @@ for file_path in file_paths:
 #     print(q)


-
 # make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
-#  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere 
-# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions 
+#  Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
+# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions

 import concurrent.futures
 import random
@ -75,7 +83,12 @@ def make_openai_completion(question):
        start_time = time.time()
        response = router.completion(
            model="gpt-3.5-turbo",
-            messages=[{"role": "system", "content": f"You are a helpful assistant. Answer this question{question}"}],
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"You are a helpful assistant. Answer this question{question}",
+                }
+            ],
        )
        print(response)
        end_time = time.time()
@ -90,11 +103,10 @@ def make_openai_completion(question):
    except Exception as e:
        # Log exceptions for failed calls
        with open("error_log.txt", "a") as error_log_file:
-            error_log_file.write(
-                f"Question: {question[:100]}\nException: {str(e)}\n\n"
-            )
+            error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
        return None

+
 # Number of concurrent calls (you can adjust this)
 concurrent_calls = 150

@ -131,4 +143,3 @@ with open("request_log.txt", "r") as log_file:

 with open("error_log.txt", "r") as error_log_file:
    print("\nError Log:\n", error_log_file.read())
-