litellm-mirror/litellm/tests/test_amazing_vertex_completion.py
2023-12-16 19:16:32 +05:30

336 lines
No EOL
14 KiB
Python

import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest, asyncio
import litellm
from litellm import embedding, completion, completion_cost, Timeout, acompletion
from litellm import RateLimitError
import json
import os
import tempfile
litellm.num_retries = 3
litellm.cache = None
user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}]
def load_vertex_ai_credentials():
# Define the path to the vertex_key.json file
print("loading vertex ai credentials")
filepath = os.path.dirname(os.path.abspath(__file__))
vertex_key_path = filepath + '/vertex_key.json'
# Read the existing content of the file or create an empty dictionary
try:
with open(vertex_key_path, 'r') as file:
# Read the file content
print("Read vertexai file path")
content = file.read()
# If the file is empty or not valid JSON, create an empty dictionary
if not content or not content.strip():
service_account_key_data = {}
else:
# Attempt to load the existing JSON content
file.seek(0)
service_account_key_data = json.load(file)
except FileNotFoundError:
# If the file doesn't exist, create an empty dictionary
service_account_key_data = {}
# Update the service_account_key_data with environment variables
private_key_id = os.environ.get("VERTEX_AI_PRIVATE_KEY_ID", "")
private_key = os.environ.get("VERTEX_AI_PRIVATE_KEY", "")
private_key = private_key.replace("\\n", "\n")
service_account_key_data["private_key_id"] = private_key_id
service_account_key_data["private_key"] = private_key
# Create a temporary file
with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
# Write the updated content to the temporary file
json.dump(service_account_key_data, temp_file, indent=2)
# Export the temporary file as GOOGLE_APPLICATION_CREDENTIALS
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.abspath(temp_file.name)
@pytest.mark.asyncio
async def get_response():
load_vertex_ai_credentials()
prompt = '\ndef count_nums(arr):\n """\n Write a function count_nums which takes an array of integers and returns\n the number of elements which has a sum of digits > 0.\n If a number is negative, then its first signed digit will be negative:\n e.g. -123 has signed digits -1, 2, and 3.\n >>> count_nums([]) == 0\n >>> count_nums([-1, 11, -11]) == 1\n >>> count_nums([1, 1, 2]) == 3\n """\n'
try:
response = await acompletion(
model="gemini-pro",
messages=[
{
"role": "system",
"content": "Complete the given code with no more explanation. Remember that there is a 4-space indent before the first line of your generated code.",
},
{"role": "user", "content": prompt},
],
)
return response
except litellm.UnprocessableEntityError as e:
pass
except Exception as e:
pytest.fail(f"An error occurred - {str(e)}")
def test_vertex_ai():
import random
load_vertex_ai_credentials()
test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models
litellm.set_verbose=False
litellm.vertex_project = "hardy-device-386718"
test_models = random.sample(test_models, 1)
test_models += litellm.vertex_language_models # always test gemini-pro
for model in test_models:
try:
if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
# our account does not have access to this model
continue
print("making request", model)
response = completion(model=model, messages=[{'role': 'user', 'content': 'hi'}], temperature=0.7)
print("\nModel Response", response)
print(response)
assert type(response.choices[0].message.content) == str
assert len(response.choices[0].message.content) > 1
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_vertex_ai()
def test_vertex_ai_stream():
load_vertex_ai_credentials()
litellm.set_verbose=False
litellm.vertex_project = "hardy-device-386718"
import random
test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models
test_models = random.sample(test_models, 1)
test_models += litellm.vertex_language_models # always test gemini-pro
for model in test_models:
try:
if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
# our account does not have access to this model
continue
print("making request", model)
response = completion(model=model, messages=[{"role": "user", "content": "write 10 line code code for saying hi"}], stream=True)
completed_str = ""
for chunk in response:
print(chunk)
content = chunk.choices[0].delta.content or ""
print("\n content", content)
completed_str += content
assert type(content) == str
# pass
assert len(completed_str) > 4
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_vertex_ai_stream()
@pytest.mark.asyncio
async def test_async_vertexai_response():
import random
load_vertex_ai_credentials()
test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models
test_models = random.sample(test_models, 1)
test_models += litellm.vertex_language_models # always test gemini-pro
for model in test_models:
print(f'model being tested in async call: {model}')
if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
# our account does not have access to this model
continue
try:
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
response = await acompletion(model=model, messages=messages, temperature=0.7, timeout=5)
print(f"response: {response}")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred: {e}")
# asyncio.run(test_async_vertexai_response())
@pytest.mark.asyncio
async def test_async_vertexai_streaming_response():
import random
load_vertex_ai_credentials()
test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models
test_models = random.sample(test_models, 1)
test_models += litellm.vertex_language_models # always test gemini-pro
for model in test_models:
if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
# our account does not have access to this model
continue
try:
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
response = await acompletion(model="gemini-pro", messages=messages, temperature=0.7, timeout=5, stream=True)
print(f"response: {response}")
complete_response = ""
async for chunk in response:
print(f"chunk: {chunk}")
complete_response += chunk.choices[0].delta.content
print(f"complete_response: {complete_response}")
assert len(complete_response) > 0
except litellm.Timeout as e:
pass
except Exception as e:
print(e)
pytest.fail(f"An exception occurred: {e}")
# asyncio.run(test_async_vertexai_streaming_response())
def test_gemini_pro_vision():
try:
load_vertex_ai_credentials()
litellm.set_verbose = True
litellm.num_retries=0
resp = litellm.completion(
model = "vertex_ai/gemini-pro-vision",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Whats in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
}
}
]
}
],
)
print(resp)
except Exception as e:
import traceback
traceback.print_exc()
raise e
# test_gemini_pro_vision()
# Extra gemini Vision tests for completion + stream, async, async + stream
# if we run into issues with gemini, we will also add these to our ci/cd pipeline
# def test_gemini_pro_vision_stream():
# try:
# litellm.set_verbose = False
# litellm.num_retries=0
# print("streaming response from gemini-pro-vision")
# resp = litellm.completion(
# model = "vertex_ai/gemini-pro-vision",
# messages=[
# {
# "role": "user",
# "content": [
# {
# "type": "text",
# "text": "Whats in this image?"
# },
# {
# "type": "image_url",
# "image_url": {
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
# }
# }
# ]
# }
# ],
# stream=True
# )
# print(resp)
# for chunk in resp:
# print(chunk)
# except Exception as e:
# import traceback
# traceback.print_exc()
# raise e
# test_gemini_pro_vision_stream()
# def test_gemini_pro_vision_async():
# try:
# litellm.set_verbose = True
# litellm.num_retries=0
# async def test():
# resp = await litellm.acompletion(
# model = "vertex_ai/gemini-pro-vision",
# messages=[
# {
# "role": "user",
# "content": [
# {
# "type": "text",
# "text": "Whats in this image?"
# },
# {
# "type": "image_url",
# "image_url": {
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
# }
# }
# ]
# }
# ],
# )
# print("async response gemini pro vision")
# print(resp)
# asyncio.run(test())
# except Exception as e:
# import traceback
# traceback.print_exc()
# raise e
# test_gemini_pro_vision_async()
# def test_gemini_pro_vision_async_stream():
# try:
# litellm.set_verbose = True
# litellm.num_retries=0
# async def test():
# resp = await litellm.acompletion(
# model = "vertex_ai/gemini-pro-vision",
# messages=[
# {
# "role": "user",
# "content": [
# {
# "type": "text",
# "text": "Whats in this image?"
# },
# {
# "type": "image_url",
# "image_url": {
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
# }
# }
# ]
# }
# ],
# stream=True
# )
# print("async response gemini pro vision")
# print(resp)
# for chunk in resp:
# print(chunk)
# asyncio.run(test())
# except Exception as e:
# import traceback
# traceback.print_exc()
# raise e
# test_gemini_pro_vision_async()