forked from phoenix/litellm-mirror
refactor: move all testing to top-level of repo
Closes https://github.com/BerriAI/litellm/issues/486
This commit is contained in:
parent
5403c5828c
commit
3560f0ef2c
213 changed files with 74 additions and 217 deletions
351
tests/local_testing/test_async_fn.py
Normal file
351
tests/local_testing/test_async_fn.py
Normal file
|
@ -0,0 +1,351 @@
|
|||
#### What this tests ####
|
||||
# This tests the the acompletion function #
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import acompletion, acreate, completion
|
||||
|
||||
litellm.num_retries = 3
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="anyscale stopped serving public api endpoints")
|
||||
def test_sync_response_anyscale():
|
||||
litellm.set_verbose = False
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
response = completion(
|
||||
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
|
||||
messages=messages,
|
||||
timeout=5,
|
||||
)
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
|
||||
# test_sync_response_anyscale()
|
||||
|
||||
|
||||
def test_async_response_openai():
|
||||
import asyncio
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
async def test_get_response():
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
parallel_tool_calls=True,
|
||||
timeout=5,
|
||||
)
|
||||
print(f"response: {response}")
|
||||
print(f"response ms: {response._response_ms}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
print(e)
|
||||
|
||||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
# test_async_response_openai()
|
||||
|
||||
|
||||
def test_async_response_azure():
|
||||
import asyncio
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
async def test_get_response():
|
||||
user_message = "What do you know?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="azure/gpt-turbo",
|
||||
messages=messages,
|
||||
base_url=os.getenv("CLOUDFLARE_AZURE_BASE_URL"),
|
||||
api_key=os.getenv("AZURE_FRANCE_API_KEY"),
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except litellm.InternalServerError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
# test_async_response_azure()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="anyscale stopped serving public api endpoints")
|
||||
def test_async_anyscale_response():
|
||||
import asyncio
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
async def test_get_response():
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
|
||||
messages=messages,
|
||||
timeout=5,
|
||||
)
|
||||
# response = await response
|
||||
print(f"response: {response}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
# test_async_anyscale_response()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Flaky test-cloudflare is very unstable")
|
||||
def test_async_completion_cloudflare():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
||||
async def test():
|
||||
response = await litellm.acompletion(
|
||||
model="cloudflare/@cf/meta/llama-2-7b-chat-int8",
|
||||
messages=[{"content": "what llm are you", "role": "user"}],
|
||||
max_tokens=5,
|
||||
num_retries=3,
|
||||
)
|
||||
print(response)
|
||||
return response
|
||||
|
||||
response = asyncio.run(test())
|
||||
text_response = response["choices"][0]["message"]["content"]
|
||||
assert len(text_response) > 1 # more than 1 chars in response
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_async_completion_cloudflare()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Flaky test")
|
||||
def test_get_cloudflare_response_streaming():
|
||||
import asyncio
|
||||
|
||||
async def test_async_call():
|
||||
user_message = "write a short poem in one sentence"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
response = await acompletion(
|
||||
model="cloudflare/@cf/meta/llama-2-7b-chat-int8",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
num_retries=3, # cloudflare ai workers is EXTREMELY UNSTABLE
|
||||
)
|
||||
print(type(response))
|
||||
|
||||
import inspect
|
||||
|
||||
is_async_generator = inspect.isasyncgen(response)
|
||||
print(is_async_generator)
|
||||
|
||||
output = ""
|
||||
i = 0
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
token = chunk["choices"][0]["delta"].get("content", "")
|
||||
if token == None:
|
||||
continue # openai v1.0.0 returns content=None
|
||||
output += token
|
||||
assert output is not None, "output cannot be None."
|
||||
assert isinstance(output, str), "output needs to be of type str"
|
||||
assert len(output) > 0, "Length of output needs to be greater than 0."
|
||||
print(f"output: {output}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
asyncio.run(test_async_call())
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_hf_completion_tgi():
|
||||
# litellm.set_verbose=True
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="huggingface/HuggingFaceH4/zephyr-7b-beta",
|
||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except litellm.APIError as e:
|
||||
print("got an api error")
|
||||
pass
|
||||
except litellm.Timeout as e:
|
||||
print("got a timeout error")
|
||||
pass
|
||||
except litellm.RateLimitError as e:
|
||||
# this will catch the model is overloaded error
|
||||
print("got a rate limit error")
|
||||
pass
|
||||
except Exception as e:
|
||||
if "Model is overloaded" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_get_cloudflare_response_streaming()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_sagemaker():
|
||||
# litellm.set_verbose=True
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
|
||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_get_response_streaming():
|
||||
import asyncio
|
||||
|
||||
async def test_async_call():
|
||||
user_message = "write a short poem in one sentence"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
response = await acompletion(
|
||||
model="gpt-3.5-turbo", messages=messages, stream=True, timeout=5
|
||||
)
|
||||
print(type(response))
|
||||
|
||||
import inspect
|
||||
|
||||
is_async_generator = inspect.isasyncgen(response)
|
||||
print(is_async_generator)
|
||||
|
||||
output = ""
|
||||
i = 0
|
||||
async for chunk in response:
|
||||
token = chunk["choices"][0]["delta"].get("content", "")
|
||||
if token == None:
|
||||
continue # openai v1.0.0 returns content=None
|
||||
output += token
|
||||
assert output is not None, "output cannot be None."
|
||||
assert isinstance(output, str), "output needs to be of type str"
|
||||
assert len(output) > 0, "Length of output needs to be greater than 0."
|
||||
print(f"output: {output}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
asyncio.run(test_async_call())
|
||||
|
||||
|
||||
# test_get_response_streaming()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="anyscale stopped serving public api endpoints")
|
||||
def test_get_response_non_openai_streaming():
|
||||
import asyncio
|
||||
|
||||
litellm.set_verbose = True
|
||||
litellm.num_retries = 0
|
||||
|
||||
async def test_async_call():
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
timeout=5,
|
||||
)
|
||||
print(type(response))
|
||||
|
||||
import inspect
|
||||
|
||||
is_async_generator = inspect.isasyncgen(response)
|
||||
print(is_async_generator)
|
||||
|
||||
output = ""
|
||||
i = 0
|
||||
async for chunk in response:
|
||||
token = chunk["choices"][0]["delta"].get("content", None)
|
||||
if token == None:
|
||||
continue
|
||||
print(token)
|
||||
output += token
|
||||
print(f"output: {output}")
|
||||
assert output is not None, "output cannot be None."
|
||||
assert isinstance(output, str), "output needs to be of type str"
|
||||
assert len(output) > 0, "Length of output needs to be greater than 0."
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
return response
|
||||
|
||||
asyncio.run(test_async_call())
|
||||
|
||||
|
||||
# test_get_response_non_openai_streaming()
|
Loading…
Add table
Add a link
Reference in a new issue