fixes to testing

This commit is contained in:
Krrish Dholakia 2023-09-16 11:39:49 -07:00
parent 09d63a6e73
commit ce827faa93
7 changed files with 91 additions and 167 deletions

View file

@ -132,6 +132,7 @@ def completion(
# model specific optional params
top_k=40,# used by text-bison only
task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
return_full_text: bool = False, # used by huggingface TGI
remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
request_timeout=0, # unused var for old version of OpenAI API
fallbacks=[],
@ -181,7 +182,8 @@ def completion(
custom_llm_provider=custom_llm_provider,
top_k=top_k,
task=task,
remove_input=remove_input
remove_input=remove_input,
return_full_text=return_full_text
)
# For logging - save the values of the litellm-specific params passed in
litellm_params = get_litellm_params(

View file

@ -92,25 +92,6 @@ def test_completion_with_litellm_call_id():
pytest.fail(f"Error occurred: {e}")
def test_completion_claude_stream():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="claude-2", messages=messages, stream=True)
# Add any assertions here to check the response
for chunk in response:
print(chunk["choices"][0]["delta"]) # same as openai format
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_claude_stream()
def test_completion_nlp_cloud():
try:
messages = [
@ -125,26 +106,6 @@ def test_completion_nlp_cloud():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_nlp_cloud_streaming():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
# Add any assertions here to check the response
for chunk in response:
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_nlp_cloud_streaming()
# test_completion_nlp_cloud_streaming()
# def test_completion_hf_api():
# try:
# user_message = "write some code to find the sum of two numbers"
@ -327,69 +288,6 @@ def test_completion_openai_with_more_optional_params():
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_stream():
try:
response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
stream=True,
frequency_penalty=-0.5,
logit_bias={27000: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
if chunk["choices"][0]["finish_reason"] == "stop" or chunk["choices"][0]["finish_reason"] == "length":
break
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_openai_with_stream()
def test_completion_openai_with_functions():
function1 = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
if chunk["choices"][0]["finish_reason"] == "stop":
break
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_openai_with_functions()
# def test_completion_openai_azure_with_functions():
# function1 = [
# {
@ -544,20 +442,6 @@ def test_completion_replicate_vicuna():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_vicuna()
def test_completion_replicate_llama_stream():
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
try:
response = completion(model=model_name, messages=messages, stream=True)
# Add any assertions here to check the response
for chunk in response:
print(chunk)
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_llama_stream()
# def test_completion_replicate_stability_stream():
# model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
# try:
@ -653,26 +537,7 @@ def test_completion_bedrock_ai21():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False
response = completion(
model="bedrock/amazon.titan-tg1-large",
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_ai21_stream()
# test_completion_sagemaker()
######## Test VLLM ########
# def test_completion_vllm():
# try:

View file

@ -213,7 +213,31 @@ def test_completion_cohere_stream():
print(f"completion_response: {complete_response}")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False
response = completion(
model="bedrock/amazon.titan-tg1-large",
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
)
# Add any assertions here to check the response
print(response)
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
if finished:
break
complete_response += chunk
if complete_response.strip() == "":
raise Exception("Empty response received")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_cohere_stream()
# test on openai completion call
@ -301,34 +325,66 @@ def test_together_ai_completion_call_starcoder():
except:
print(f"error occurred: {traceback.format_exc()}")
pass
# test_together_ai_completion_call_starcoder()
# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build
# def test_aleph_alpha_call():
# try:
# start_time = time.time()
# response = completion(
# model="luminous-base",
# messages=messages,
# logger_fn=logger_fn,
# stream=True,
# )
# complete_response = ""
# print(f"returned response object: {response}")
# for chunk in response:
# chunk_time = time.time()
# complete_response += (
# chunk["choices"][0]["delta"]["content"]
# if len(chunk["choices"][0]["delta"].keys()) > 0
# else ""
# )
# if len(complete_response) > 0:
# print(complete_response)
# if complete_response == "":
# raise Exception("Empty response received")
# except:
# print(f"error occurred: {traceback.format_exc()}")
# pass
#### Test Async streaming
def test_completion_nlp_cloud_streaming():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
if finished:
break
complete_response += chunk
if complete_response == "":
raise Exception("Empty response received")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
#### Test Function calling + streaming ####
def test_completion_openai_with_functions():
function1 = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
if chunk["choices"][0]["finish_reason"] == "stop":
break
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_openai_with_functions()
#### Test Async streaming ####
# # test on ai21 completion call
async def ai21_async_completion_call():

View file

@ -828,6 +828,7 @@ def get_optional_params( # use the openai defaults
model=None,
custom_llm_provider="",
top_k=40,
return_full_text=False,
task=None
):
optional_params = {}
@ -885,6 +886,7 @@ def get_optional_params( # use the openai defaults
optional_params["max_new_tokens"] = max_tokens
if presence_penalty != 0:
optional_params["repetition_penalty"] = presence_penalty
optional_params["return_full_text"] = return_full_text
optional_params["details"] = True
optional_params["task"] = task
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
@ -2507,7 +2509,6 @@ class CustomStreamWrapper:
model_response = ModelResponse(stream=True, model=self.model)
try:
# return this for all models
print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
if self.sent_first_chunk == False:
model_response.choices[0].delta.role = "assistant"
self.sent_first_chunk = True

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "0.1.677"
version = "0.1.678"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT License"