litellm-mirror/litellm/tests/test_completion.py

import sys, os
import traceback
from dotenv import load_dotenv

load_dotenv()
import os, io

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import pytest
from openai import Timeout
import litellm
from litellm import embedding, completion, completion_cost
from litellm import RateLimitError
litellm.num_retries = 3
litellm.cache = None
user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}]

def logger_fn(user_model_dict):
    print(f"user_model_dict: {user_model_dict}")


def test_completion_custom_provider_model_name():
    try:
        litellm.cache = None
        response = completion(
            model="together_ai/togethercomputer/llama-2-70b-chat",
            messages=messages,
            logger_fn=logger_fn,
        )
        # Add any assertions here to check the response
        print(response)
        print(response['choices'][0]['finish_reason'])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")


# test_completion_custom_provider_model_name()


def test_completion_claude():
    litellm.set_verbose = False
    litellm.cache = None
    litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"})
    try:
        # test without max tokens
        response = completion(
            model="claude-instant-1", messages=messages, request_timeout=10,
        )
        # Add any assertions here to check the response
        print(response)
        print(response.usage)
        print(response.usage.completion_tokens)
        print(response["usage"]["completion_tokens"])
        # print("new cost tracking")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

test_completion_claude()

# def test_completion_oobabooga():
#     try:
#         response = completion(
#             model="oobabooga/vicuna-1.3b", messages=messages, api_base="http://127.0.0.1:5000"
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_completion_oobabooga()
# aleph alpha
# def test_completion_aleph_alpha():
#     try:
#         response = completion(
#             model="luminous-base", messages=messages, logger_fn=logger_fn
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_completion_aleph_alpha()


# def test_completion_aleph_alpha_control_models():
#     try:
#         response = completion(
#             model="luminous-base-control", messages=messages, logger_fn=logger_fn
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_completion_aleph_alpha_control_models()

import openai
def test_completion_gpt4_turbo():
    try:
        response = completion(
            model="gpt-4-1106-preview",
            messages=messages,
            max_tokens=10,
        )
        print(response)
    except openai.RateLimitError:
        print("got a rate liimt error")
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_gpt4_turbo()

def test_completion_gpt4_vision():
    try:
        litellm.set_verbose=True
        response = completion(
            model="gpt-4-vision-preview",
            messages=[
                {
                    "role": "user",
                    "content": [
                                    {
                                        "type": "text",
                                        "text": "Whats in this image?"
                                    },
                                    {
                                        "type": "image_url",
                                        "image_url": {
                                        "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
                                        }
                                    }
                                ]
                }
            ],
        )
        print(response)
    except openai.RateLimitError:
        print("got a rate liimt error")
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_gpt4_vision()

def test_completion_perplexity_api():
    try:
        # litellm.set_verbose=True
        messages=[{
            "role": "system",
            "content": "You're a good bot"
        },{
            "role": "user",
            "content": "Hey",
        },{
            "role": "user",
            "content": "Hey",
        }]
        response = completion(
            model="mistral-7b-instruct",
            messages=messages,
            api_base="https://api.perplexity.ai")
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_perplexity_api()

def test_completion_perplexity_api_2():
    try:
        # litellm.set_verbose=True
        messages=[{
            "role": "system",
            "content": "You're a good bot"
        },{
            "role": "user",
            "content": "Hey",
        },{
            "role": "user",
            "content": "Hey",
        }]
        response = completion(
            model="perplexity/mistral-7b-instruct",
            messages=messages
        )
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_perplexity_api_2()

# commenting out as this is a flaky test on circle ci
# def test_completion_nlp_cloud():
#     try:
#         messages = [
#             {"role": "system", "content": "You are a helpful assistant."},
#             {
#                 "role": "user",
#                 "content": "how does a court case get to the Supreme Court?",
#             },
#         ]
#         response = completion(model="dolphin", messages=messages, logger_fn=logger_fn)
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_completion_nlp_cloud()

######### HUGGING FACE TESTS ########################
#####################################################
"""
HF Tests we should pass
- TGI:
    - Pro Inference API
    - Deployed Endpoint
- Coversational
    - Free Inference API
    - Deployed Endpoint
- Neither TGI or Coversational
    - Free Inference API
    - Deployed Endpoint
"""
#####################################################
#####################################################
# Test util to sort models to TGI, conv, None
def test_get_hf_task_for_model():
    model = "glaiveai/glaive-coder-7b"
    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
    print(f"model:{model}, model type: {model_type}")
    assert(model_type == "text-generation-inference")

    model = "meta-llama/Llama-2-7b-hf"
    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
    print(f"model:{model}, model type: {model_type}")
    assert(model_type == "text-generation-inference")

    model = "facebook/blenderbot-400M-distill"
    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
    print(f"model:{model}, model type: {model_type}")
    assert(model_type == "conversational")

    model = "facebook/blenderbot-3B"
    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
    print(f"model:{model}, model type: {model_type}")
    assert(model_type == "conversational")

    # neither Conv or None
    model = "roneneldan/TinyStories-3M"
    model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model)
    print(f"model:{model}, model type: {model_type}")
    assert(model_type == None)

# test_get_hf_task_for_model()
# litellm.set_verbose=False
# ################### Hugging Face TGI models ########################
# # TGI model
# # this is a TGI model https://huggingface.co/glaiveai/glaive-coder-7b
def hf_test_completion_tgi():
    # litellm.set_verbose=True
    try:
        response = completion(
            model = 'huggingface/HuggingFaceH4/zephyr-7b-beta',
            messages = [{ "content": "Hello, how are you?","role": "user"}],
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# hf_test_completion_tgi()

def hf_test_completion_tgi_stream():
    try:
        response = completion(
            model = 'huggingface/HuggingFaceH4/zephyr-7b-beta',
            messages = [{ "content": "Hello, how are you?","role": "user"}],
            stream=True
        )
        # Add any assertions here to check the response
        print(response)
        for chunk in response:
            print(chunk["choices"][0]["delta"]["content"])
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# hf_test_completion_tgi_stream()

# ################### Hugging Face Conversational models ########################
# def hf_test_completion_conv():
#     try:
#         response = litellm.completion(
#             model="huggingface/facebook/blenderbot-3B",
#             messages=[{ "content": "Hello, how are you?","role": "user"}],
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# hf_test_completion_conv()

# ################### Hugging Face Neither TGI or Conversational models ########################
# # Neither TGI or Conversational
# def hf_test_completion_none_task():
#     try:
#         user_message = "My name is Merve and my favorite"
#         messages = [{ "content": user_message,"role": "user"}]
#         response = completion(
#             model="huggingface/roneneldan/TinyStories-3M",
#             messages=messages,
#             api_base="https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud",
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# hf_test_completion_none_task()
########################### End of Hugging Face Tests ##############################################
# def test_completion_hf_api():
# # failing on circle ci commenting out
#     try:
#         user_message = "write some code to find the sum of two numbers"
#         messages = [{ "content": user_message,"role": "user"}]
#         api_base = "https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud"
#         response = completion(model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base=api_base)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         if "loading" in str(e):
#             pass
#         pytest.fail(f"Error occurred: {e}")

# test_completion_hf_api()

# def test_completion_hf_api_best_of():
# # failing on circle ci commenting out
#     try:
#         user_message = "write some code to find the sum of two numbers"
#         messages = [{ "content": user_message,"role": "user"}]
#         api_base = "https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud"
#         response = completion(model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base=api_base, n=2)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         if "loading" in str(e):
#             pass
#         pytest.fail(f"Error occurred: {e}")

# test_completion_hf_api_best_of()

# def test_completion_hf_deployed_api():
#     try:
#         user_message = "There's a llama in my garden 😱 What should I do?"
#         messages = [{ "content": user_message,"role": "user"}]
#         response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")


# this should throw an exception, to trigger https://logs.litellm.ai/
# def hf_test_error_logs():
#     try:
#         litellm.set_verbose=True
#         user_message = "My name is Merve and my favorite"
#         messages = [{ "content": user_message,"role": "user"}]
#         response = completion(
#             model="huggingface/roneneldan/TinyStories-3M",
#             messages=messages,
#             api_base="https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud",

#         )
#         # Add any assertions here to check the response
#         print(response)

#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# hf_test_error_logs()

def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky
    try:
        litellm.CohereConfig(max_tokens=1000, stop_sequences=["a"])
        response = completion(
            model="command-nightly",
            messages=messages,
            logger_fn=logger_fn
        )
        # Add any assertions here to check the response
        print(response)
        response_str = response["choices"][0]["message"]["content"]
        response_str_2 = response.choices[0].message.content
        if type(response_str) != str:
            pytest.fail(f"Error occurred: {e}")
        if type(response_str_2) != str:
            pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_cohere()


def test_completion_openai():
    try:
        litellm.set_verbose=True
        print(f"api key: {os.environ['OPENAI_API_KEY']}")
        litellm.api_key = os.environ['OPENAI_API_KEY']
        response = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=10, request_timeout=10)
        print("This is the response object\n", response)


        response_str = response["choices"][0]["message"]["content"]
        response_str_2 = response.choices[0].message.content

        cost = completion_cost(completion_response=response)
        print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
        assert response_str == response_str_2
        assert type(response_str) == str
        assert len(response_str) > 1

        litellm.api_key = None
    except Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_openai()

def test_completion_text_openai():
    try:
        # litellm.set_verbose = True
        response = completion(model="gpt-3.5-turbo-instruct", messages=messages)
        print(response["choices"][0]["message"]["content"])
    except Exception as e:
        print(e)
        pytest.fail(f"Error occurred: {e}")
# test_completion_text_openai()

def test_completion_openai_with_optional_params():
    try:
        response = completion(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.5,
            top_p=0.1,
            user="ishaan_dev@berri.ai",
        )
        # Add any assertions here to check the response
        print(response)
    except Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

def test_completion_openai_litellm_key():
    try:
        litellm.api_key = os.environ['OPENAI_API_KEY']

        # ensure key is set to None in .env and in openai.api_key
        os.environ['OPENAI_API_KEY'] = ""
        import openai
        openai.api_key = ""
        ##########################################################

        response = completion(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.5,
            top_p=0.1,
            max_tokens=10,
            user="ishaan_dev@berri.ai",
        )
        # Add any assertions here to check the response
        print(response)

        ###### reset environ key
        os.environ['OPENAI_API_KEY'] = litellm.api_key

        ##### unset litellm var
        litellm.api_key = None
    except Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_openai_litellm_key()

def test_completion_openrouter1():
    try:
        response = completion(
            model="openrouter/google/palm-2-chat-bison",
            messages=messages,
            max_tokens=5,
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_openrouter1()

def test_completion_openrouter2():
    try:
        print("testing openrouter/gpt-3.5-turbo")
        response = completion(
            model="openrouter/gpt-3.5-turbo",
            messages=messages,
            max_tokens=5,
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_openrouter2()

def test_completion_openrouter3():
    try:
        response = completion(
            model="openrouter/mistralai/mistral-7b-instruct",
            messages=messages,
            max_tokens=5,
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_openrouter3()

def test_completion_hf_model_no_provider():
    try:
        response = completion(
            model="WizardLM/WizardLM-70B-V1.0",
            messages=messages,
            max_tokens=5,
        )
        # Add any assertions here to check the response
        print(response)
        pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pass

# test_completion_hf_model_no_provider()

def test_completion_hf_model_no_provider_2():
    try:
        response = completion(
            model="meta-llama/Llama-2-70b-chat-hf",
            messages=messages,
            max_tokens=5,
        )
        # Add any assertions here to check the response
        pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pass

# test_completion_hf_model_no_provider_2()

def test_completion_openai_with_more_optional_params():
    try:
        response = completion(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.5,
            top_p=0.1,
            n=2,
            max_tokens=150,
            presence_penalty=0.5,
            frequency_penalty=-0.5,
            logit_bias={123: 5},
            user="ishaan_dev@berri.ai",
        )
        # Add any assertions here to check the response
        print(response)
        response_str = response["choices"][0]["message"]["content"]
        response_str_2 = response.choices[0].message.content
        print(response["choices"][0]["message"]["content"])
        print(response.choices[0].message.content)
        if type(response_str) != str:
            pytest.fail(f"Error occurred: {e}")
        if type(response_str_2) != str:
            pytest.fail(f"Error occurred: {e}")
    except Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_openai_with_more_optional_params()
# def test_completion_openai_azure_with_functions():
#     function1 = [
#         {
#             "name": "get_current_weather",
#             "description": "Get the current weather in a given location",
#             "parameters": {
#                 "type": "object",
#                 "properties": {
#                     "location": {
#                         "type": "string",
#                         "description": "The city and state, e.g. San Francisco, CA",
#                     },
#                     "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
#                 },
#                 "required": ["location"],
#             },
#         }
#     ]
#     try:
#         response = completion(
#             model="azure/chatgpt-functioncalling", messages=messages, stream=True
#         )
#         # Add any assertions here to check the response
#         print(response)
#         for chunk in response:
#             print(chunk)
#             print(chunk["choices"][0]["finish_reason"])
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_completion_openai_azure_with_functions()


def test_completion_azure():
    try:
        print("azure gpt-3.5 test\n\n")
        litellm.set_verbose=False
        ## Test azure call
        response = completion(
            model="azure/chatgpt-v-2",
            messages=messages,
        )
        ## Test azure flag for backwards compatibility
        response = completion(
            model="chatgpt-v-2",
            messages=messages,
            azure=True,
            max_tokens=10
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_azure()
def test_completion_azure2():
    # test if we can pass api_base, api_version and api_key in compleition()
    try:
        print("azure gpt-3.5 test\n\n")
        litellm.set_verbose=False
        api_base = os.environ["AZURE_API_BASE"]
        api_key = os.environ["AZURE_API_KEY"]
        api_version = os.environ["AZURE_API_VERSION"]

        os.environ["AZURE_API_BASE"] = ""
        os.environ["AZURE_API_VERSION"] = ""
        os.environ["AZURE_API_KEY"] = ""


        ## Test azure call
        response = completion(
            model="azure/chatgpt-v-2",
            messages=messages,
            api_base = api_base,
            api_key = api_key,
            api_version = api_version,
            max_tokens=10,
        )

        # Add any assertions here to check the response
        print(response)

        os.environ["AZURE_API_BASE"] = api_base
        os.environ["AZURE_API_VERSION"] = api_version
        os.environ["AZURE_API_KEY"] = api_key

    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_azure2()

def test_completion_azure3():
    # test if we can pass api_base, api_version and api_key in compleition()
    try:
        print("azure gpt-3.5 test\n\n")
        litellm.set_verbose=True
        litellm.api_base = os.environ["AZURE_API_BASE"]
        litellm.api_key = os.environ["AZURE_API_KEY"]
        litellm.api_version = os.environ["AZURE_API_VERSION"]

        os.environ["AZURE_API_BASE"] = ""
        os.environ["AZURE_API_VERSION"] = ""
        os.environ["AZURE_API_KEY"] = ""


        ## Test azure call
        response = completion(
            model="azure/chatgpt-v-2",
            messages=messages,
            max_tokens=10,
        )

        # Add any assertions here to check the response
        print(response)

        os.environ["AZURE_API_BASE"] = litellm.api_base
        os.environ["AZURE_API_VERSION"] = litellm.api_version
        os.environ["AZURE_API_KEY"] = litellm.api_key

    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_azure3()

# new azure test for using litellm. vars,
# use the following vars in this test and make an azure_api_call
#  litellm.api_type = self.azure_api_type
#  litellm.api_base = self.azure_api_base
#  litellm.api_version = self.azure_api_version
#  litellm.api_key = self.api_key
def test_completion_azure_with_litellm_key():
    try:
        print("azure gpt-3.5 test\n\n")
        import openai


        #### set litellm vars
        litellm.api_type = "azure"
        litellm.api_base = os.environ['AZURE_API_BASE']
        litellm.api_version = os.environ['AZURE_API_VERSION']
        litellm.api_key = os.environ['AZURE_API_KEY']

        ######### UNSET ENV VARs for this ################
        os.environ['AZURE_API_BASE'] = ""
        os.environ['AZURE_API_VERSION'] = ""
        os.environ['AZURE_API_KEY'] = ""

        ######### UNSET OpenAI vars for this ##############
        openai.api_type = ""
        openai.api_base = "gm"
        openai.api_version = "333"
        openai.api_key = "ymca"

        response = completion(
            model="azure/chatgpt-v-2",
            messages=messages,
        )
        # Add any assertions here to check the response
        print(response)


        ######### RESET ENV VARs for this ################
        os.environ['AZURE_API_BASE'] = litellm.api_base
        os.environ['AZURE_API_VERSION'] = litellm.api_version
        os.environ['AZURE_API_KEY'] = litellm.api_key

        ######### UNSET litellm vars
        litellm.api_type = None
        litellm.api_base = None
        litellm.api_version = None
        litellm.api_key = None

    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_azure()


def test_completion_azure_deployment_id():
    try:
        litellm.set_verbose = True
        response = completion(
            deployment_id="chatgpt-v-2",
            model="gpt-3.5-turbo",
            messages=messages,
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_azure_deployment_id()

# Only works for local endpoint
# def test_completion_anthropic_openai_proxy():
#     try:
#         response = completion(
#             model="custom_openai/claude-2",
#             messages=messages,
#             api_base="http://0.0.0.0:8000"
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_completion_anthropic_openai_proxy()

def test_completion_replicate_vicuna():
    print("TESTING REPLICATE")
    litellm.set_verbose=False
    model_name = "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b"
    try:
        response = completion(
            model=model_name,
            messages=messages,
            temperature=0.5,
            top_k=20,
            repetition_penalty=1,
            min_tokens=1,
            seed=-1,
            max_tokens=20,
        )
        print(response)
        # Add any assertions here to check the response
        response_str = response["choices"][0]["message"]["content"]
        print("RESPONSE STRING\n", response_str)
        if type(response_str) != str:
            pytest.fail(f"Error occurred: {e}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_vicuna()

def test_completion_replicate_llama2_stream():
    print("TESTING REPLICATE streaming")
    litellm.set_verbose=False
    model_name = "replicate/meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0"
    try:
        response = completion(
            model=model_name,
            messages=[
                {
                    "role": "user",
                    "content": "what is yc write 1 paragraph",
                }
            ],
            stream=True,
            max_tokens=20,
            num_retries=3
        )
        print(response)
        # Add any assertions here to check the response
        for i, chunk in enumerate(response):
            if i == 0:
                assert len(chunk.choices[0].delta["content"]) > 5
            print(chunk)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_llama2_stream()

# commenthing this out since we won't be always testing a custom replicate deployment
# def test_completion_replicate_deployments():
#     print("TESTING REPLICATE")
#     litellm.set_verbose=False
#     model_name = "replicate/deployments/ishaan-jaff/ishaan-mistral"
#     try:
#         response = completion(
#             model=model_name,
#             messages=messages,
#             temperature=0.5,
#             seed=-1,
#         )
#         print(response)
#         # Add any assertions here to check the response
#         response_str = response["choices"][0]["message"]["content"]
#         print("RESPONSE STRING\n", response_str)
#         if type(response_str) != str:
#             pytest.fail(f"Error occurred: {e}")
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_deployments()


######## Test TogetherAI ########
def test_completion_together_ai():
    model_name = "together_ai/togethercomputer/llama-2-70b-chat"
    try:
        response = completion(model=model_name, messages=messages, max_tokens=256, n=1, logger_fn=logger_fn)
        # Add any assertions here to check the response
        print(response)
        cost = completion_cost(completion_response=response)
        print("Cost for completion call together-computer/llama-2-70b: ", f"${float(cost):.10f}")
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_together_ai()
def test_customprompt_together_ai():
    try:
        litellm.set_verbose = False
        litellm.num_retries = 0
        response = completion(
            model="together_ai/OpenAssistant/llama2-70b-oasst-sft-v10",
            messages=messages,
            roles={"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
        )
        print(response)
    except litellm.exceptions.Timeout as e:
        print(f"Timeout Error")
        litellm.num_retries = 3 # reset retries
        pass
    except Exception as e:
        print(f"ERROR TYPE {type(e)}")
        pytest.fail(f"Error occurred: {e}")

# test_customprompt_together_ai()

def test_completion_sagemaker():
    try:
        response = completion(
            model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
            messages=messages,
            temperature=0.2,
            max_tokens=80,
            logger_fn=logger_fn
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_sagemaker()

def test_completion_bedrock_titan():
    try:
        response = completion(
            model="bedrock/amazon.titan-tg1-large",
            messages=messages,
            temperature=0.2,
            max_tokens=200,
            top_p=0.8,
            logger_fn=logger_fn
        )
        # Add any assertions here to check the response
        print(response)
    except RateLimitError:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_titan()

def test_completion_bedrock_claude():
    print("calling claude")
    try:
        response = completion(
            model="anthropic.claude-instant-v1",
            messages=messages,
            max_tokens=10,
            temperature=0.1,
            logger_fn=logger_fn
        )
        # Add any assertions here to check the response
        print(response)
    except RateLimitError:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude()

def test_completion_bedrock_cohere():
    print("calling bedrock cohere")
    try:
        response = completion(
            model="bedrock/cohere.command-text-v14",
            messages=[{"role": "user", "content": "hi"}],
            temperature=0.1,
            max_tokens=10,
            stream=True
        )
        # Add any assertions here to check the response
        print(response)
        for chunk in response:
            print(chunk)
    except RateLimitError:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_cohere()


def test_completion_bedrock_claude_completion_auth():
    print("calling bedrock claude completion params auth")
    import os

    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
    aws_region_name = os.environ["AWS_REGION_NAME"]

    os.environ["AWS_ACCESS_KEY_ID"] = ""
    os.environ["AWS_SECRET_ACCESS_KEY"] = ""
    os.environ["AWS_REGION_NAME"] = ""


    try:
        response = completion(
            model="bedrock/anthropic.claude-instant-v1",
            messages=messages,
            max_tokens=10,
            temperature=0.1,
            logger_fn=logger_fn,
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_region_name=aws_region_name,
        )
        # Add any assertions here to check the response
        print(response)

        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
        os.environ["AWS_REGION_NAME"] = aws_region_name
    except RateLimitError:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_completion_auth()

# def test_completion_bedrock_claude_external_client_auth():
#     print("calling bedrock claude external client auth")
#     import os

#     aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
#     aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
#     aws_region_name = os.environ["AWS_REGION_NAME"]

#     os.environ["AWS_ACCESS_KEY_ID"] = ""
#     os.environ["AWS_SECRET_ACCESS_KEY"] = ""
#     os.environ["AWS_REGION_NAME"] = ""

#     try:
#         import boto3
#         bedrock = boto3.client(
#             service_name="bedrock-runtime",
#             region_name=aws_region_name,
#             aws_access_key_id=aws_access_key_id,
#             aws_secret_access_key=aws_secret_access_key,
#             endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com"
#         )

#         response = completion(
#             model="bedrock/anthropic.claude-instant-v1",
#             messages=messages,
#             max_tokens=10,
#             temperature=0.1,
#             logger_fn=logger_fn,
#             aws_bedrock_client=bedrock,
#         )
#         # Add any assertions here to check the response
#         print(response)

#         os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
#         os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
#         os.environ["AWS_REGION_NAME"] = aws_region_name
#     except RateLimitError:
#         pass
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_external_client_auth()

# def test_completion_bedrock_claude_stream():
#     print("calling claude")
#     litellm.set_verbose = False
#     try:
#         response = completion(
#             model="bedrock/anthropic.claude-instant-v1",
#             messages=messages,
#             stream=True
#         )
#         # Add any assertions here to check the response
#         print(response)
#         for chunk in response:
#             print(chunk)
#     except RateLimitError:
#         pass
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_stream()

# def test_completion_bedrock_ai21():
#     try:
#         litellm.set_verbose = False
#         response = completion(
#             model="bedrock/ai21.j2-mid",
#             messages=messages,
#             temperature=0.2,
#             top_p=0.2,
#             max_tokens=20
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except RateLimitError:
#         pass
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")


######## Test VLLM ########
# def test_completion_vllm():
#     try:
#         response = completion(
#             model="vllm/facebook/opt-125m",
#             messages=messages,
#             temperature=0.2,
#             max_tokens=80,
#         )
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_completion_vllm()

# def test_completion_hosted_chatCompletion():
#     # this tests calling a server where vllm is hosted
#     # this should make an openai.Completion() call to the specified api_base
#     # send a request to this proxy server: https://replit.com/@BerriAI/openai-proxy#main.py
#     # it checks if model == facebook/opt-125m and returns test passed
#     try:
#         litellm.set_verbose = True
#         response = completion(
#             model="facebook/opt-125m",
#             messages=messages,
#             temperature=0.2,
#             max_tokens=80,
#             api_base="https://openai-proxy.berriai.repl.co",
#             custom_llm_provider="openai"
#         )
#         print(response)

#         if response['choices'][0]['message']['content'] != "passed":
#             # see https://replit.com/@BerriAI/openai-proxy#main.py
#             pytest.fail(f"Error occurred: proxy server did not respond")
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_completion_hosted_chatCompletion()

# def test_completion_custom_api_base():
#     try:
#         response = completion(
#             model="custom/meta-llama/Llama-2-13b-hf",
#             messages=messages,
#             temperature=0.2,
#             max_tokens=10,
#             api_base="https://api.autoai.dev/inference",
#             request_timeout=300,
#         )
#         # Add any assertions here to check the response
#         print("got response\n", response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_completion_custom_api_base()

# def test_vertex_ai():
#     test_models = ["codechat-bison"] + litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models
#     # test_models = ["chat-bison"]
#     for model in test_models:
#         try:
#             if model in ["code-gecko@001", "code-gecko@latest"]:
#                 # our account does not have access to this model
#                 continue
#             print("making request", model)
#             response = completion(model=model, messages=[{'role': 'user', 'content': 'hi'}])
#             print(response)

#             print(response.usage.completion_tokens)
#             print(response['usage']['completion_tokens'])
#             assert type(response.choices[0].message.content) == str
#         except Exception as e:
#             pytest.fail(f"Error occurred: {e}")
# test_vertex_ai()

# def test_vertex_ai_stream():
#     litellm.set_verbose=False
#     test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models
#     for model in test_models:
#         try:
#             if model in ["code-gecko@001", "code-gecko@latest"]:
#                 # our account does not have access to this model
#                 continue
#             print("making request", model)
#             response = completion(model=model, messages=[{"role": "user", "content": "write 100 line code code for saying hi"}], stream=True)
#             for chunk in response:
#                 print(chunk)
#                 # pass
#         except Exception as e:
#             pytest.fail(f"Error occurred: {e}")
# test_vertex_ai_stream()


def test_completion_with_fallbacks():
    print(f"RUNNING TEST COMPLETION WITH FALLBACKS -  test_completion_with_fallbacks")
    fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo", "command-nightly"]
    try:
        response = completion(
            model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_with_fallbacks()
def test_completion_anyscale_api():
    try:
        # litellm.set_verbose=True
        messages=[{
            "role": "system",
            "content": "You're a good bot"
        },{
            "role": "user",
            "content": "Hey",
        },{
            "role": "user",
            "content": "Hey",
        }]
        response = completion(
            model="anyscale/meta-llama/Llama-2-7b-chat-hf",
            messages=messages,)
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_anyscale_api()

def test_completion_anyscale_2():
    try:
        # litellm.set_verbose=True
        messages=[{
            "role": "system",
            "content": "You're a good bot"
        },{
            "role": "user",
            "content": "Hey",
        },{
            "role": "user",
            "content": "Hey",
        }]
        response = completion(
            model="anyscale/meta-llama/Llama-2-7b-chat-hf",
            messages=messages
        )
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

def test_mistral_anyscale_stream():
    litellm.set_verbose=False
    response = completion(
        model = 'anyscale/mistralai/Mistral-7B-Instruct-v0.1',
        messages = [{ "content": "hello, good morning","role": "user"}],
        stream=True,
    )
    for chunk in response:
        # print(chunk)
        print(chunk["choices"][0]["delta"].get("content", ""), end="")
# test_mistral_anyscale_stream()
# test_completion_anyscale_2()
# def test_completion_with_fallbacks_multiple_keys():
#     print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
#     print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
#     backup_keys = [{"api_key": os.getenv("BACKUP_OPENAI_API_KEY_1")}, {"api_key": os.getenv("BACKUP_OPENAI_API_KEY_2")}]
#     try:
#         api_key = "bad-key"
#         response = completion(
#             model="gpt-3.5-turbo", messages=messages, force_timeout=120, fallbacks=backup_keys, api_key=api_key
#         )
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         error_str = traceback.format_exc()
#         pytest.fail(f"Error occurred: {error_str}")

# test_completion_with_fallbacks_multiple_keys()
# def test_petals():
#     try:
#         response = completion(model="petals-team/StableBeluga2", messages=messages)
#         # Add any assertions here to check the response
#         print(response)

#         response = completion(model="petals-team/StableBeluga2", messages=messages)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# def test_baseten():
#     try:

#         response = completion(model="baseten/7qQNLDB", messages=messages, logger_fn=logger_fn)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_baseten()
# def test_baseten_falcon_7bcompletion():
#     model_name = "qvv0xeq"
#     try:
#         response = completion(model=model_name, messages=messages, custom_llm_provider="baseten")
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_baseten_falcon_7bcompletion()

# def test_baseten_falcon_7bcompletion_withbase():
#     model_name = "qvv0xeq"
#     litellm.api_base = "https://app.baseten.co"
#     try:
#         response = completion(model=model_name, messages=messages)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
#     litellm.api_base = None

# test_baseten_falcon_7bcompletion_withbase()


# def test_baseten_wizardLMcompletion_withbase():
#     model_name = "q841o8w"
#     litellm.api_base = "https://app.baseten.co"
#     try:
#         response = completion(model=model_name, messages=messages)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")

# test_baseten_wizardLMcompletion_withbase()

# def test_baseten_mosaic_ML_completion_withbase():
#     model_name = "31dxrj3"
#     litellm.api_base = "https://app.baseten.co"
#     try:
#         response = completion(model=model_name, messages=messages)
#         # Add any assertions here to check the response
#         print(response)
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")


#### Test A121 ###################
def test_completion_ai21():
    model_name = "j2-light"
    try:
        response = completion(model=model_name, messages=messages, max_tokens=100, temperature=0.8)
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

# test_completion_ai21()
## test deep infra
def test_completion_deep_infra():
    litellm.set_verbose = False
    model_name = "deepinfra/meta-llama/Llama-2-70b-chat-hf"
    try:
        response = completion(
            model=model_name,
            messages=messages,
            temperature=0,
            max_tokens=10
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
test_completion_deep_infra()

def test_completion_deep_infra_mistral():
    print("deep infra test with temp=0")
    model_name = "deepinfra/mistralai/Mistral-7B-Instruct-v0.1"
    try:
        response = completion(
            model=model_name,
            messages=messages,
            temperature=0.01, # mistrail fails with temperature=0
            max_tokens=10
        )
        # Add any assertions here to check the response
        print(response)
    except litellm.exceptions.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_deep_infra_mistral()

# Palm tests
def test_completion_palm():
    # litellm.set_verbose = True
    model_name = "palm/chat-bison"
    try:
        response = completion(model=model_name, messages=messages, stop=["stop"])
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_palm()

# test palm with streaming
def test_completion_palm_stream():
    # litellm.set_verbose = True
    model_name = "palm/chat-bison"
    try:
        response = completion(
            model=model_name,
            messages=messages,
            stop=["stop"],
            stream=True,
            max_tokens=20
        )
        # Add any assertions here to check the response
        for chunk in response:
            print(chunk)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_palm_stream()

# test_completion_deep_infra()
# test_completion_ai21()
# test config file with completion #
# def test_completion_openai_config():
#     try:
#         litellm.config_path = "../config.json"
#         litellm.set_verbose = True
#         response = litellm.config_completion(messages=messages)
#         # Add any assertions here to check the response
#         print(response)
#         litellm.config_path = None
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")


# def test_maritalk():
#     messages = [{"role": "user", "content": "Hey"}]
#     try:
#         response = completion("maritalk", messages=messages)
#         print(f"response: {response}")
#     except Exception as e:
#         pytest.fail(f"Error occurred: {e}")
# test_maritalk()

def test_completion_together_ai_stream():
    user_message = "Write 1pg about YC & litellm"
    messages = [{ "content": user_message,"role": "user"}]
    try:
        response = completion(
            model="together_ai/togethercomputer/llama-2-70b-chat",
            messages=messages, stream=True,
            max_tokens=5
        )
        print(response)
        for chunk in response:
            print(chunk)
        # print(string_response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
# test_completion_together_ai_stream()


# async def get_response(generator):
#     async for elem in generator:
#         print(elem)
#     return

# test_completion_together_ai_stream()

def test_moderation():
    import openai
    openai.api_type = "azure"
    openai.api_version = "GM"
    response = litellm.moderation(input="i'm ishaan cto of litellm")
    print(response)
    output = response.results[0]
    print(output)
    return output

test_moderation()