import sys, os import traceback from dotenv import load_dotenv load_dotenv() import os sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import pytest import litellm from litellm import embedding, completion, text_completion, completion_cost litellm.vertex_project = "pathrise-convert-1606954137718" litellm.vertex_location = "us-central1" litellm.use_client = True # from infisical import InfisicalClient # litellm.set_verbose = True # litellm.secret_manager_client = InfisicalClient(token=os.environ["INFISICAL_TOKEN"]) user_message = "Write a short poem about the sky" messages = [{"content": user_message, "role": "user"}] def logger_fn(user_model_dict): print(f"user_model_dict: {user_model_dict}") def test_completion_custom_provider_model_name(): try: response = completion( model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn, ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_custom_provider_model_name() def test_completion_claude(): try: response = completion( model="claude-instant-1", messages=messages, logger_fn=logger_fn ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # aleph alpha # def test_completion_aleph_alpha(): # try: # response = completion( # model="luminous-base", messages=messages, logger_fn=logger_fn # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_aleph_alpha() # def test_completion_aleph_alpha_control_models(): # try: # response = completion( # model="luminous-base-control", messages=messages, logger_fn=logger_fn # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_aleph_alpha_control_models() def test_completion_with_litellm_call_id(): try: litellm.use_client = False response = completion( model="gpt-3.5-turbo", messages=messages) print(response) if 'litellm_call_id' in response: pytest.fail(f"Error occurred: litellm_call_id in response objects") litellm.use_client = True response2 = completion( model="gpt-3.5-turbo", messages=messages) if 'litellm_call_id' not in response2: pytest.fail(f"Error occurred: litellm_call_id not in response object when use_client = True") # Add any assertions here to check the response print(response2) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_claude_stream(): try: messages = [ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": "how does a court case get to the Supreme Court?", }, ] response = completion(model="claude-2", messages=messages, stream=True) # Add any assertions here to check the response for chunk in response: print(chunk["choices"][0]["delta"]) # same as openai format except Exception as e: pytest.fail(f"Error occurred: {e}") # def test_completion_hf_api(): # try: # user_message = "write some code to find the sum of two numbers" # messages = [{ "content": user_message,"role": "user"}] # response = completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface", logger_fn=logger_fn) # # Add any assertions here to check the response # print(response) # except Exception as e: # if "loading" in str(e): # pass # pytest.fail(f"Error occurred: {e}") # test_completion_hf_api() # def test_completion_hf_deployed_api(): # try: # user_message = "There's a llama in my garden 😱 What should I do?" # messages = [{ "content": user_message,"role": "user"}] # response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky # try: # response = completion( # model="command-nightly", # messages=messages, # max_tokens=100, # logit_bias={40: 10}, # ) # # Add any assertions here to check the response # print(response) # response_str = response["choices"][0]["message"]["content"] # print(f"str response{response_str}") # response_str_2 = response.choices[0].message.content # if type(response_str) != str: # pytest.fail(f"Error occurred: {e}") # if type(response_str_2) != str: # pytest.fail(f"Error occurred: {e}") # except Exception as e: # pytest.fail(f"Error occurred: {e}") def test_completion_cohere_stream(): try: messages = [ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": "how does a court case get to the Supreme Court?", }, ] response = completion( model="command-nightly", messages=messages, stream=True, max_tokens=50 ) # Add any assertions here to check the response for chunk in response: print(chunk["choices"][0]["delta"]) # same as openai format except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_openai(): try: litellm.api_key = os.environ['OPENAI_API_KEY'] response = completion(model="gpt-3.5-turbo", messages=messages) response_str = response["choices"][0]["message"]["content"] response_str_2 = response.choices[0].message.content print("response\n", response) cost = completion_cost(completion_response=response) print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}") assert response_str == response_str_2 assert type(response_str) == str assert len(response_str) > 1 except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_openai() def test_completion_openai_prompt(): try: response = text_completion( model="gpt-3.5-turbo", prompt="What's the weather in SF?" ) response_str = response["choices"][0]["message"]["content"] response_str_2 = response.choices[0].message.content print(response) assert response_str == response_str_2 assert type(response_str) == str assert len(response_str) > 1 except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_text_openai(): try: response = completion(model="text-davinci-003", messages=messages) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_openai_with_optional_params(): try: response = completion( model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, user="ishaan_dev@berri.ai", ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # commented out for now, as openrouter is quite flaky - causing our deployments to fail. Please run this before pushing changes. # def test_completion_openrouter(): # try: # response = completion( # model="google/palm-2-chat-bison", # messages=messages, # temperature=0.5, # top_p=0.1, # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") def test_completion_openai_with_more_optional_params(): try: response = completion( model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, frequency_penalty=-0.5, logit_bias={123: 5}, user="ishaan_dev@berri.ai", ) # Add any assertions here to check the response print(response) response_str = response["choices"][0]["message"]["content"] response_str_2 = response.choices[0].message.content print(response["choices"][0]["message"]["content"]) print(response.choices[0].message.content) if type(response_str) != str: pytest.fail(f"Error occurred: {e}") if type(response_str_2) != str: pytest.fail(f"Error occurred: {e}") except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_openai_with_stream(): try: response = completion( model="gpt-3.5-turbo", messages=messages, temperature=0.5, top_p=0.1, n=2, max_tokens=150, presence_penalty=0.5, stream=True, frequency_penalty=-0.5, logit_bias={27000: 5}, user="ishaan_dev@berri.ai", ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_openai_with_functions(): function1 = [ { "name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", }, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, "required": ["location"], }, } ] try: response = completion( model="gpt-3.5-turbo", messages=messages, functions=function1 ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_azure(): try: print("azure gpt-3.5 test\n\n") response = completion( model="azure/chatgpt-v-2", messages=messages, ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_azure_deployment_id(): try: response = completion( deployment_id="chatgpt-v-2", model="gpt-3.5-turbo", messages=messages, ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # Replicate API endpoints are unstable -> throw random CUDA errors -> this means our tests can fail even if our tests weren't incorrect. def test_completion_replicate_llama_2(): model_name = "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf" try: response = completion( model=model_name, messages=messages, max_tokens=20, custom_llm_provider="replicate" ) print(response) cost = completion_cost(completion_response=response) print("Cost for completion call with llama-2: ", f"${float(cost):.10f}") # Add any assertions here to check the response response_str = response["choices"][0]["message"]["content"] print(response_str) if type(response_str) != str: pytest.fail(f"Error occurred: {e}") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_replicate_llama_2() def test_completion_replicate_vicuna(): model_name = "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b" try: response = completion( model=model_name, messages=messages, custom_llm_provider="replicate", temperature=0.1, max_tokens=20, ) print(response) # Add any assertions here to check the response response_str = response["choices"][0]["message"]["content"] print(response_str) if type(response_str) != str: pytest.fail(f"Error occurred: {e}") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_replicate_vicuna() def test_completion_replicate_llama_stream(): model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" try: response = completion(model=model_name, messages=messages, stream=True) # Add any assertions here to check the response for result in response: print(result) # chunk_text = result['choices'][0]['delta']['content'] except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_replicate_llama_stream() # def test_completion_replicate_stability_stream(): # model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" # try: # response = completion( # model=model_name, # messages=messages, # # stream=True, # custom_llm_provider="replicate", # ) # # print(response) # # Add any assertions here to check the response # # for chunk in response: # # print(chunk["choices"][0]["delta"]) # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_replicate_stability_stream() ######## Test TogetherAI ######## def test_completion_together_ai(): model_name = "togethercomputer/llama-2-70b-chat" try: response = completion(model=model_name, messages=messages, max_tokens=256, logger_fn=logger_fn) # Add any assertions here to check the response print(response) cost = completion_cost(completion_response=response) print("Cost for completion call together-computer/llama-2-70b: ", f"${float(cost):.10f}") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_together_ai() # def test_customprompt_together_ai(): # try: # litellm.register_prompt_template( # model="OpenAssistant/llama2-70b-oasst-sft-v10", # roles={"system":"<|im_start|>system", "assistant":"<|im_start|>assistant", "user":"<|im_start|>user"}, # tell LiteLLM how you want to map the openai messages to this model # pre_message_sep= "\n", # post_message_sep= "\n" # ) # response = completion(model="together_ai/OpenAssistant/llama2-70b-oasst-sft-v10", messages=messages) # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") def test_completion_sagemaker(): try: response = completion( model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", messages=messages, temperature=0.2, max_tokens=80, ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") ######## Test VLLM ######## # def test_completion_vllm(): # try: # response = completion( # model="vllm/facebook/opt-125m", # messages=messages, # temperature=0.2, # max_tokens=80, # ) # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_vllm() # def test_completion_custom_api_base(): # try: # response = completion( # model="custom/meta-llama/Llama-2-13b-hf", # messages=messages, # temperature=0.2, # max_tokens=10, # api_base="https://api.autoai.dev/inference", # request_timeout=300, # ) # # Add any assertions here to check the response # print("got response\n", response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_completion_custom_api_base() # def test_vertex_ai(): # model_name = "chat-bison" # try: # response = completion(model=model_name, messages=messages, logger_fn=logger_fn) # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # def test_petals(): # model_name = "stabilityai/StableBeluga2" # try: # response = completion( # model=model_name, # messages=messages, # custom_llm_provider="petals", # force_timeout=120, # ) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") def test_completion_with_fallbacks(): fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"] try: response = completion( model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks ) # Add any assertions here to check the response print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") # def test_baseten(): # try: # response = completion(model="baseten/RqgAEn0", messages=messages, logger_fn=logger_fn) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # def test_baseten_falcon_7bcompletion(): # model_name = "qvv0xeq" # try: # response = completion(model=model_name, messages=messages, custom_llm_provider="baseten") # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_baseten_falcon_7bcompletion() # def test_baseten_falcon_7bcompletion_withbase(): # model_name = "qvv0xeq" # litellm.api_base = "https://app.baseten.co" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # litellm.api_base = None # test_baseten_falcon_7bcompletion_withbase() # def test_baseten_wizardLMcompletion_withbase(): # model_name = "q841o8w" # litellm.api_base = "https://app.baseten.co" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test_baseten_wizardLMcompletion_withbase() # def test_baseten_mosaic_ML_completion_withbase(): # model_name = "31dxrj3" # litellm.api_base = "https://app.baseten.co" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") #### Test A121 ################### # def test_completion_ai21(): # model_name = "j2-light" # try: # response = completion(model=model_name, messages=messages) # # Add any assertions here to check the response # print(response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # test config file with completion # # def test_completion_openai_config(): # try: # litellm.config_path = "../config.json" # litellm.set_verbose = True # response = litellm.config_completion(messages=messages) # # Add any assertions here to check the response # print(response) # litellm.config_path = None # except Exception as e: # pytest.fail(f"Error occurred: {e}") # import asyncio # def test_completion_together_ai_stream(): # user_message = "Write 1pg about YC & litellm" # messages = [{ "content": user_message,"role": "user"}] # try: # response = completion(model="togethercomputer/llama-2-70b-chat", messages=messages, stream=True, max_tokens=800) # print(response) # asyncio.run(get_response(response)) # # print(string_response) # except Exception as e: # pytest.fail(f"Error occurred: {e}") # async def get_response(generator): # async for elem in generator: # print(elem) # return # test_completion_together_ai_stream()