import sys, os import traceback from dotenv import load_dotenv load_dotenv() import os, io sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import pytest import litellm from litellm import embedding, completion, completion_cost, Timeout from litellm import RateLimitError litellm.num_retries = 3 litellm.cache = None user_message = "Write a short poem about the sky" messages = [{"content": user_message, "role": "user"}] def logger_fn(user_model_dict): print(f"user_model_dict: {user_model_dict}") def test_completion_custom_provider_model_name(): try: litellm.cache = None response = completion( model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn, ) # Add any assertions here to check the response print(response) print(response['choices'][0]['finish_reason']) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_custom_provider_model_name() def test_completion_claude(): litellm.set_verbose = False litellm.cache = None litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"}) try: # test without max tokens response = completion( model="claude-instant-1", messages=messages, request_timeout=10, ) # Add any assertions here to check the response print(response) print(response.usage) print(response.usage.completion_tokens) print(response["usage"]["completion_tokens"]) # print("new cost tracking") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_claude() # def test_completion_oobabooga(): # try: # response = completion( # model="oobabooga/vicuna-1.3b", messages=messages, api_base="http://127.0.0.1:5000" # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_oobabooga() # aleph alpha # def test_completion_aleph_alpha(): # try: # response = completion( # model="luminous-base", messages=messages, logger_fn=logger_fn # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_aleph_alpha() # def test_completion_aleph_alpha_control_models(): # try: # response = completion( # model="luminous-base-control", messages=messages, logger_fn=logger_fn # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_aleph_alpha_control_models() import openai def test_completion_gpt4_turbo(): try: response = completion( model="gpt-4-1106-preview", messages=messages, max_tokens=10, ) print(response) except openai.RateLimitError: print("got a rate liimt error") pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_gpt4_turbo() def test_completion_gpt4_vision(): try: litellm.set_verbose=True response = completion( model="gpt-4-vision-preview", messages=[ { "role": "user", "content": [ { "type": "text", "text": "Whats in this image?" }, { "type": "image_url", "image_url": { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" } } ] } ], ) print(response) except openai.RateLimitError: print("got a rate liimt error") pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_gpt4_vision() def test_completion_perplexity_api(): try: # litellm.set_verbose=True messages=[{ "role": "system", "content": "You're a good bot" },{ "role": "user", "content": "Hey", },{ "role": "user", "content": "Hey", }] response = completion( model="mistral-7b-instruct", messages=messages, api_base="https://api.perplexity.ai") print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_perplexity_api() def test_completion_perplexity_api_2(): try: # litellm.set_verbose=True messages=[{ "role": "system", "content": "You're a good bot" },{ "role": "user", "content": "Hey", },{ "role": "user", "content": "Hey", }] response = completion( model="perplexity/mistral-7b-instruct", messages=messages ) print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_perplexity_api_2() # commenting out as this is a flaky test on circle ci # def test_completion_nlp_cloud(): # try: # messages = [ # {"role": "system", "content": "You are a helpful assistant."}, # { # "role": "user", # "content": "how does a court case get to the Supreme Court?", # }, # ] # response = completion(model="dolphin", messages=messages, logger_fn=logger_fn) # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_nlp_cloud() ######### HUGGING FACE TESTS ######################## ##################################################### """ HF Tests we should pass - TGI: - Pro Inference API - Deployed Endpoint - Coversational - Free Inference API - Deployed Endpoint - Neither TGI or Coversational - Free Inference API - Deployed Endpoint """ ##################################################### ##################################################### # Test util to sort models to TGI, conv, None def test_get_hf_task_for_model(): model = "glaiveai/glaive-coder-7b" model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) print(f"model:{model}, model type: {model_type}") assert(model_type == "text-generation-inference") model = "meta-llama/Llama-2-7b-hf" model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) print(f"model:{model}, model type: {model_type}") assert(model_type == "text-generation-inference") model = "facebook/blenderbot-400M-distill" model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) print(f"model:{model}, model type: {model_type}") assert(model_type == "conversational") model = "facebook/blenderbot-3B" model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) print(f"model:{model}, model type: {model_type}") assert(model_type == "conversational") # neither Conv or None model = "roneneldan/TinyStories-3M" model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) print(f"model:{model}, model type: {model_type}") assert(model_type == None) # test_get_hf_task_for_model() # litellm.set_verbose=False # ################### Hugging Face TGI models ######################## # # TGI model # # this is a TGI model https://huggingface.co/glaiveai/glaive-coder-7b def hf_test_completion_tgi(): # litellm.set_verbose=True try: response = completion( model = 'huggingface/HuggingFaceH4/zephyr-7b-beta', messages = [{ "content": "Hello, how are you?","role": "user"}], ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # hf_test_completion_tgi() def hf_test_completion_tgi_stream(): try: response = completion( model = 'huggingface/HuggingFaceH4/zephyr-7b-beta', messages = [{ "content": "Hello, how are you?","role": "user"}], stream=True ) # Add any assertions here to check the response print(response) for chunk in response: print(chunk["choices"][0]["delta"]["content"]) except Exception as e: pytest.fail(f"Error occurred: {e}") # hf_test_completion_tgi_stream() # ################### Hugging Face Conversational models ######################## # def hf_test_completion_conv(): # try: # response = litellm.completion( # model="huggingface/facebook/blenderbot-3B", # messages=[{ "content": "Hello, how are you?","role": "user"}], # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # hf_test_completion_conv() # ################### Hugging Face Neither TGI or Conversational models ######################## # # Neither TGI or Conversational # def hf_test_completion_none_task(): # try: # user_message = "My name is Merve and my favorite" # messages = [{ "content": user_message,"role": "user"}] # response = completion( # model="huggingface/roneneldan/TinyStories-3M", # messages=messages, # api_base="https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud", # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # hf_test_completion_none_task() ########################### End of Hugging Face Tests ############################################## # def test_completion_hf_api(): # # failing on circle ci commenting out # try: # user_message = "write some code to find the sum of two numbers" # messages = [{ "content": user_message,"role": "user"}] # api_base = "https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud" # response = completion(model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base=api_base) # # Add any assertions here to check the response # print(response) # except Exception as e: # if "loading" in str(e): # pass # pytest.fail(f"Error occurred: {e}") # test_completion_hf_api() # def test_completion_hf_api_best_of(): # # failing on circle ci commenting out # try: # user_message = "write some code to find the sum of two numbers" # messages = [{ "content": user_message,"role": "user"}] # api_base = "https://a8l9e3ucxinyl3oj.us-east-1.aws.endpoints.huggingface.cloud" # response = completion(model="huggingface/meta-llama/Llama-2-7b-chat-hf", messages=messages, api_base=api_base, n=2) # # Add any assertions here to check the response # print(response) # except Exception as e: # if "loading" in str(e): # pass # pytest.fail(f"Error occurred: {e}") # test_completion_hf_api_best_of() # def test_completion_hf_deployed_api(): # try: # user_message = "There's a llama in my garden 😱 What should I do?" # messages = [{ "content": user_message,"role": "user"}] # response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # this should throw an exception, to trigger https://logs.litellm.ai/ # def hf_test_error_logs(): # try: # litellm.set_verbose=True # user_message = "My name is Merve and my favorite" # messages = [{ "content": user_message,"role": "user"}] # response = completion( # model="huggingface/roneneldan/TinyStories-3M", # messages=messages, # api_base="https://p69xlsj6rpno5drq.us-east-1.aws.endpoints.huggingface.cloud", # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # hf_test_error_logs() def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky try: litellm.CohereConfig(max_tokens=1000, stop_sequences=["a"]) response = completion( model="command-nightly", messages=messages, logger_fn=logger_fn ) # Add any assertions here to check the response print(response) response_str = response["choices"][0]["message"]["content"] response_str_2 = response.choices[0].message.content if type(response_str) != str: pytest.fail(f"Error occurred: {e}") if type(response_str_2) != str: pytest.fail(f"Error occurred: {e}") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_cohere() def test_completion_openai(): try: litellm.set_verbose=True print(f"api key: {os.environ['OPENAI_API_KEY']}") litellm.api_key = os.environ['OPENAI_API_KEY'] response = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=10, request_timeout=10) print("This is the response object\n", response) response_str = response["choices"][0]["message"]["content"] response_str_2 = response.choices[0].message.content cost = completion_cost(completion_response=response) print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}") assert response_str == response_str_2 assert type(response_str) == str assert len(response_str) > 1 litellm.api_key = None except Timeout as e: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_openai() def test_completion_text_openai(): try: # litellm.set_verbose = True response = completion(model="gpt-3.5-turbo-instruct", messages=messages) print(response["choices"][0]["message"]["content"]) except Exception as e: print(e) pytest.fail(f"Error occurred: {e}") # test_completion_text_openai() def test_completion_openai_with_optional_params(): try: litellm.set_verbose = True response = completion( model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, ) # Add any assertions here to check the response print(response) except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_openai_with_optional_params() def test_completion_openai_litellm_key(): try: litellm.set_verbose = True litellm.num_retries = 0 litellm.api_key = os.environ['OPENAI_API_KEY'] # ensure key is set to None in .env and in openai.api_key os.environ['OPENAI_API_KEY'] = "" import openai openai.api_key = "" ########################################################## response = completion( model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, max_tokens=10, user="ishaan_dev@berri.ai", ) # Add any assertions here to check the response print(response) ###### reset environ key os.environ['OPENAI_API_KEY'] = litellm.api_key ##### unset litellm var litellm.api_key = None except Timeout as e: pass except Exception as e: pytest.fail(f"Error occurred: {e}") test_completion_openai_litellm_key() def test_completion_openrouter1(): try: response = completion( model="openrouter/google/palm-2-chat-bison", messages=messages, max_tokens=5, ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_openrouter1() def test_completion_hf_model_no_provider(): try: response = completion( model="WizardLM/WizardLM-70B-V1.0", messages=messages, max_tokens=5, ) # Add any assertions here to check the response print(response) pytest.fail(f"Error occurred: {e}") except Exception as e: pass # test_completion_hf_model_no_provider() # def test_completion_openai_azure_with_functions(): # function1 = [ # { # "name": "get_current_weather", # "description": "Get the current weather in a given location", # "parameters": { # "type": "object", # "properties": { # "location": { # "type": "string", # "description": "The city and state, e.g. San Francisco, CA", # }, # "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, # }, # "required": ["location"], # }, # } # ] # try: # messages = [{"role": "user", "content": "What is the weather like in Boston?"}] # response = completion( # model="azure/chatgpt-functioncalling", messages=messages, functions=function1 # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_openai_azure_with_functions() def test_completion_azure(): try: print("azure gpt-3.5 test\n\n") litellm.set_verbose=False ## Test azure call response = completion( model="azure/chatgpt-v-2", messages=messages, ) ## Test azure flag for backwards compatibility response = completion( model="chatgpt-v-2", messages=messages, azure=True, max_tokens=10 ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_azure_openai_ad_token(): # this tests if the azure ad token is set in the request header # the request can fail since azure ad tokens expire after 30 mins, but the header MUST have the azure ad token # we use litellm.input_callbacks for this test def tester( kwargs, # kwargs to completion ): print(kwargs["additional_args"]) if kwargs["additional_args"]["headers"]["Authorization"] != 'Bearer gm': pytest.fail("AZURE AD TOKEN Passed but not set in request header") return litellm.input_callback = [tester] try: response = litellm.completion( model="azure/chatgpt-v-2", # e.g. gpt-35-instant messages=[ { "role": "user", "content": "what is your name", }, ], azure_ad_token="gm" ) print("azure ad token respoonse\n") print(response) litellm.input_callback = [] except: litellm.input_callback = [] pass # test_azure_openai_ad_token() # test_completion_azure() def test_completion_azure2(): # test if we can pass api_base, api_version and api_key in compleition() try: print("azure gpt-3.5 test\n\n") litellm.set_verbose=False api_base = os.environ["AZURE_API_BASE"] api_key = os.environ["AZURE_API_KEY"] api_version = os.environ["AZURE_API_VERSION"] os.environ["AZURE_API_BASE"] = "" os.environ["AZURE_API_VERSION"] = "" os.environ["AZURE_API_KEY"] = "" ## Test azure call response = completion( model="azure/chatgpt-v-2", messages=messages, api_base = api_base, api_key = api_key, api_version = api_version, max_tokens=10, ) # Add any assertions here to check the response print(response) os.environ["AZURE_API_BASE"] = api_base os.environ["AZURE_API_VERSION"] = api_version os.environ["AZURE_API_KEY"] = api_key except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_azure2() def test_completion_azure3(): # test if we can pass api_base, api_version and api_key in compleition() try: print("azure gpt-3.5 test\n\n") litellm.set_verbose=True litellm.api_base = os.environ["AZURE_API_BASE"] litellm.api_key = os.environ["AZURE_API_KEY"] litellm.api_version = os.environ["AZURE_API_VERSION"] os.environ["AZURE_API_BASE"] = "" os.environ["AZURE_API_VERSION"] = "" os.environ["AZURE_API_KEY"] = "" ## Test azure call response = completion( model="azure/chatgpt-v-2", messages=messages, max_tokens=10, ) # Add any assertions here to check the response print(response) os.environ["AZURE_API_BASE"] = litellm.api_base os.environ["AZURE_API_VERSION"] = litellm.api_version os.environ["AZURE_API_KEY"] = litellm.api_key except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_azure3() # new azure test for using litellm. vars, # use the following vars in this test and make an azure_api_call # litellm.api_type = self.azure_api_type # litellm.api_base = self.azure_api_base # litellm.api_version = self.azure_api_version # litellm.api_key = self.api_key def test_completion_azure_with_litellm_key(): try: print("azure gpt-3.5 test\n\n") import openai #### set litellm vars litellm.api_type = "azure" litellm.api_base = os.environ['AZURE_API_BASE'] litellm.api_version = os.environ['AZURE_API_VERSION'] litellm.api_key = os.environ['AZURE_API_KEY'] ######### UNSET ENV VARs for this ################ os.environ['AZURE_API_BASE'] = "" os.environ['AZURE_API_VERSION'] = "" os.environ['AZURE_API_KEY'] = "" ######### UNSET OpenAI vars for this ############## openai.api_type = "" openai.api_base = "gm" openai.api_version = "333" openai.api_key = "ymca" response = completion( model="azure/chatgpt-v-2", messages=messages, ) # Add any assertions here to check the response print(response) ######### RESET ENV VARs for this ################ os.environ['AZURE_API_BASE'] = litellm.api_base os.environ['AZURE_API_VERSION'] = litellm.api_version os.environ['AZURE_API_KEY'] = litellm.api_key ######### UNSET litellm vars litellm.api_type = None litellm.api_base = None litellm.api_version = None litellm.api_key = None except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_azure() def test_completion_azure_deployment_id(): try: litellm.set_verbose = True response = completion( deployment_id="chatgpt-v-2", model="gpt-3.5-turbo", messages=messages, ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_azure_deployment_id() # Only works for local endpoint # def test_completion_anthropic_openai_proxy(): # try: # response = completion( # model="custom_openai/claude-2", # messages=messages, # api_base="http://0.0.0.0:8000" # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_anthropic_openai_proxy() def test_completion_replicate_vicuna(): print("TESTING REPLICATE") litellm.set_verbose=False model_name = "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b" try: response = completion( model=model_name, messages=messages, temperature=0.5, top_k=20, repetition_penalty=1, min_tokens=1, seed=-1, max_tokens=20, ) print(response) # Add any assertions here to check the response response_str = response["choices"][0]["message"]["content"] print("RESPONSE STRING\n", response_str) if type(response_str) != str: pytest.fail(f"Error occurred: {e}") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_replicate_vicuna() def test_completion_replicate_llama2_stream(): print("TESTING REPLICATE streaming") litellm.set_verbose=False model_name = "replicate/meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0" try: response = completion( model=model_name, messages=[ { "role": "user", "content": "what is yc write 1 paragraph", } ], stream=True, max_tokens=20, num_retries=3 ) print(response) # Add any assertions here to check the response for i, chunk in enumerate(response): if i == 0: assert len(chunk.choices[0].delta["content"]) > 5 print(chunk) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_replicate_llama2_stream() # commenthing this out since we won't be always testing a custom replicate deployment # def test_completion_replicate_deployments(): # print("TESTING REPLICATE") # litellm.set_verbose=False # model_name = "replicate/deployments/ishaan-jaff/ishaan-mistral" # try: # response = completion( # model=model_name, # messages=messages, # temperature=0.5, # seed=-1, # ) # print(response) # # Add any assertions here to check the response # response_str = response["choices"][0]["message"]["content"] # print("RESPONSE STRING\n", response_str) # if type(response_str) != str: # pytest.fail(f"Error occurred: {e}") # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_replicate_deployments() ######## Test TogetherAI ######## def test_completion_together_ai(): model_name = "together_ai/togethercomputer/llama-2-70b-chat" try: response = completion(model=model_name, messages=messages, max_tokens=256, n=1, logger_fn=logger_fn) # Add any assertions here to check the response print(response) cost = completion_cost(completion_response=response) print("Cost for completion call together-computer/llama-2-70b: ", f"${float(cost):.10f}") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_together_ai() def test_customprompt_together_ai(): try: litellm.set_verbose = False litellm.num_retries = 0 response = completion( model="together_ai/OpenAssistant/llama2-70b-oasst-sft-v10", messages=messages, roles={"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} ) print(response) except litellm.exceptions.Timeout as e: print(f"Timeout Error") litellm.num_retries = 3 # reset retries pass except Exception as e: print(f"ERROR TYPE {type(e)}") pytest.fail(f"Error occurred: {e}") # test_customprompt_together_ai() def test_completion_sagemaker(): try: response = completion( model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", messages=messages, temperature=0.2, max_tokens=80, logger_fn=logger_fn ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_sagemaker() def test_completion_bedrock_titan(): try: response = completion( model="bedrock/amazon.titan-tg1-large", messages=messages, temperature=0.2, max_tokens=200, top_p=0.8, logger_fn=logger_fn ) # Add any assertions here to check the response print(response) except RateLimitError: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_bedrock_titan() def test_completion_bedrock_claude(): print("calling claude") try: response = completion( model="anthropic.claude-instant-v1", messages=messages, max_tokens=10, temperature=0.1, logger_fn=logger_fn ) # Add any assertions here to check the response print(response) except RateLimitError: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_bedrock_claude() def test_completion_bedrock_cohere(): print("calling bedrock cohere") try: response = completion( model="bedrock/cohere.command-text-v14", messages=[{"role": "user", "content": "hi"}], temperature=0.1, max_tokens=10, stream=True ) # Add any assertions here to check the response print(response) for chunk in response: print(chunk) except RateLimitError: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_bedrock_cohere() def test_completion_bedrock_claude_completion_auth(): print("calling bedrock claude completion params auth") import os aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] aws_region_name = os.environ["AWS_REGION_NAME"] os.environ["AWS_ACCESS_KEY_ID"] = "" os.environ["AWS_SECRET_ACCESS_KEY"] = "" os.environ["AWS_REGION_NAME"] = "" try: response = completion( model="bedrock/anthropic.claude-instant-v1", messages=messages, max_tokens=10, temperature=0.1, logger_fn=logger_fn, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_region_name=aws_region_name, ) # Add any assertions here to check the response print(response) os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key os.environ["AWS_REGION_NAME"] = aws_region_name except RateLimitError: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_bedrock_claude_completion_auth() # def test_completion_bedrock_claude_external_client_auth(): # print("calling bedrock claude external client auth") # import os # aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"] # aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"] # aws_region_name = os.environ["AWS_REGION_NAME"] # os.environ["AWS_ACCESS_KEY_ID"] = "" # os.environ["AWS_SECRET_ACCESS_KEY"] = "" # os.environ["AWS_REGION_NAME"] = "" # try: # import boto3 # bedrock = boto3.client( # service_name="bedrock-runtime", # region_name=aws_region_name, # aws_access_key_id=aws_access_key_id, # aws_secret_access_key=aws_secret_access_key, # endpoint_url=f"https://bedrock-runtime.{aws_region_name}.amazonaws.com" # ) # response = completion( # model="bedrock/anthropic.claude-instant-v1", # messages=messages, # max_tokens=10, # temperature=0.1, # logger_fn=logger_fn, # aws_bedrock_client=bedrock, # ) # # Add any assertions here to check the response # print(response) # os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id # os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key # os.environ["AWS_REGION_NAME"] = aws_region_name # except RateLimitError: # pass # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_bedrock_claude_external_client_auth() # def test_completion_bedrock_claude_stream(): # print("calling claude") # litellm.set_verbose = False # try: # response = completion( # model="bedrock/anthropic.claude-instant-v1", # messages=messages, # stream=True # ) # # Add any assertions here to check the response # print(response) # for chunk in response: # print(chunk) # except RateLimitError: # pass # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_bedrock_claude_stream() # def test_completion_bedrock_ai21(): # try: # litellm.set_verbose = False # response = completion( # model="bedrock/ai21.j2-mid", # messages=messages, # temperature=0.2, # top_p=0.2, # max_tokens=20 # ) # # Add any assertions here to check the response # print(response) # except RateLimitError: # pass # except Exception as e: # pytest.fail(f"Error occurred: {e}") ######## Test VLLM ######## # def test_completion_vllm(): # try: # response = completion( # model="vllm/facebook/opt-125m", # messages=messages, # temperature=0.2, # max_tokens=80, # ) # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_vllm() # def test_completion_hosted_chatCompletion(): # # this tests calling a server where vllm is hosted # # this should make an openai.Completion() call to the specified api_base # # send a request to this proxy server: https://replit.com/@BerriAI/openai-proxy#main.py # # it checks if model == facebook/opt-125m and returns test passed # try: # litellm.set_verbose = True # response = completion( # model="facebook/opt-125m", # messages=messages, # temperature=0.2, # max_tokens=80, # api_base="https://openai-proxy.berriai.repl.co", # custom_llm_provider="openai" # ) # print(response) # if response['choices'][0]['message']['content'] != "passed": # # see https://replit.com/@BerriAI/openai-proxy#main.py # pytest.fail(f"Error occurred: proxy server did not respond") # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_hosted_chatCompletion() # def test_completion_custom_api_base(): # try: # response = completion( # model="custom/meta-llama/Llama-2-13b-hf", # messages=messages, # temperature=0.2, # max_tokens=10, # api_base="https://api.autoai.dev/inference", # request_timeout=300, # ) # # Add any assertions here to check the response # print("got response\n", response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_custom_api_base() # def test_vertex_ai(): # test_models = ["codechat-bison"] + litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models # # test_models = ["chat-bison"] # for model in test_models: # try: # if model in ["code-gecko@001", "code-gecko@latest"]: # # our account does not have access to this model # continue # print("making request", model) # response = completion(model=model, messages=[{'role': 'user', 'content': 'hi'}]) # print(response) # print(response.usage.completion_tokens) # print(response['usage']['completion_tokens']) # assert type(response.choices[0].message.content) == str # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_vertex_ai() # def test_vertex_ai_stream(): # litellm.set_verbose=False # test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models # for model in test_models: # try: # if model in ["code-gecko@001", "code-gecko@latest"]: # # our account does not have access to this model # continue # print("making request", model) # response = completion(model=model, messages=[{"role": "user", "content": "write 100 line code code for saying hi"}], stream=True) # for chunk in response: # print(chunk) # # pass # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_vertex_ai_stream() def test_completion_with_fallbacks(): print(f"RUNNING TEST COMPLETION WITH FALLBACKS - test_completion_with_fallbacks") fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo", "command-nightly"] try: response = completion( model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_with_fallbacks() def test_completion_anyscale_api(): try: # litellm.set_verbose=True messages=[{ "role": "system", "content": "You're a good bot" },{ "role": "user", "content": "Hey", },{ "role": "user", "content": "Hey", }] response = completion( model="anyscale/meta-llama/Llama-2-7b-chat-hf", messages=messages,) print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_anyscale_api() def test_completion_anyscale_2(): try: # litellm.set_verbose=True messages=[{ "role": "system", "content": "You're a good bot" },{ "role": "user", "content": "Hey", },{ "role": "user", "content": "Hey", }] response = completion( model="anyscale/meta-llama/Llama-2-7b-chat-hf", messages=messages ) print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_mistral_anyscale_stream(): litellm.set_verbose=False response = completion( model = 'anyscale/mistralai/Mistral-7B-Instruct-v0.1', messages = [{ "content": "hello, good morning","role": "user"}], stream=True, ) for chunk in response: # print(chunk) print(chunk["choices"][0]["delta"].get("content", ""), end="") # test_mistral_anyscale_stream() # test_completion_anyscale_2() # def test_completion_with_fallbacks_multiple_keys(): # print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}") # print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}") # backup_keys = [{"api_key": os.getenv("BACKUP_OPENAI_API_KEY_1")}, {"api_key": os.getenv("BACKUP_OPENAI_API_KEY_2")}] # try: # api_key = "bad-key" # response = completion( # model="gpt-3.5-turbo", messages=messages, force_timeout=120, fallbacks=backup_keys, api_key=api_key # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # error_str = traceback.format_exc() # pytest.fail(f"Error occurred: {error_str}") # test_completion_with_fallbacks_multiple_keys() # def test_petals(): # try: # response = completion(model="petals-team/StableBeluga2", messages=messages) # # Add any assertions here to check the response # print(response) # response = completion(model="petals-team/StableBeluga2", messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # def test_baseten(): # try: # response = completion(model="baseten/7qQNLDB", messages=messages, logger_fn=logger_fn) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_baseten() # def test_baseten_falcon_7bcompletion(): # model_name = "qvv0xeq" # try: # response = completion(model=model_name, messages=messages, custom_llm_provider="baseten") # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_baseten_falcon_7bcompletion() # def test_baseten_falcon_7bcompletion_withbase(): # model_name = "qvv0xeq" # litellm.api_base = "https://app.baseten.co" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # litellm.api_base = None # test_baseten_falcon_7bcompletion_withbase() # def test_baseten_wizardLMcompletion_withbase(): # model_name = "q841o8w" # litellm.api_base = "https://app.baseten.co" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_baseten_wizardLMcompletion_withbase() # def test_baseten_mosaic_ML_completion_withbase(): # model_name = "31dxrj3" # litellm.api_base = "https://app.baseten.co" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") #### Test A121 ################### def test_completion_ai21(): print("running ai21 j2light test") litellm.set_verbose=True model_name = "j2-light" try: response = completion(model=model_name, messages=messages, max_tokens=100, temperature=0.8) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_ai21() ## test deep infra def test_completion_deep_infra(): litellm.set_verbose = False model_name = "deepinfra/meta-llama/Llama-2-70b-chat-hf" try: response = completion( model=model_name, messages=messages, temperature=0, max_tokens=10 ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_deep_infra() def test_completion_deep_infra_mistral(): print("deep infra test with temp=0") model_name = "deepinfra/mistralai/Mistral-7B-Instruct-v0.1" try: response = completion( model=model_name, messages=messages, temperature=0.01, # mistrail fails with temperature=0 max_tokens=10 ) # Add any assertions here to check the response print(response) except litellm.exceptions.Timeout as e: pass except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_deep_infra_mistral() # Palm tests def test_completion_palm(): litellm.set_verbose = True model_name = "palm/chat-bison" messages = [{"role": "user", "content": "Hey, how's it going?"}] try: response = completion(model=model_name, messages=messages) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_palm() # test palm with streaming def test_completion_palm_stream(): # litellm.set_verbose = True model_name = "palm/chat-bison" try: response = completion( model=model_name, messages=messages, stop=["stop"], stream=True, max_tokens=20 ) # Add any assertions here to check the response for chunk in response: print(chunk) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_palm_stream() # test_completion_deep_infra() # test_completion_ai21() # test config file with completion # # def test_completion_openai_config(): # try: # litellm.config_path = "../config.json" # litellm.set_verbose = True # response = litellm.config_completion(messages=messages) # # Add any assertions here to check the response # print(response) # litellm.config_path = None # except Exception as e: # pytest.fail(f"Error occurred: {e}") # def test_maritalk(): # messages = [{"role": "user", "content": "Hey"}] # try: # response = completion("maritalk", messages=messages) # print(f"response: {response}") # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_maritalk() def test_completion_together_ai_stream(): user_message = "Write 1pg about YC & litellm" messages = [{ "content": user_message,"role": "user"}] try: response = completion( model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, stream=True, max_tokens=5 ) print(response) for chunk in response: print(chunk) # print(string_response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_together_ai_stream() # async def get_response(generator): # async for elem in generator: # print(elem) # return # test_completion_together_ai_stream() def test_moderation(): import openai openai.api_type = "azure" openai.api_version = "GM" response = litellm.moderation(input="i'm ishaan cto of litellm") print(response) output = response.results[0] print(output) return output # test_moderation()