fixes to testing

This commit is contained in:
Krrish Dholakia 2023-09-16 11:39:49 -07:00
parent 09d63a6e73
commit ce827faa93
7 changed files with 91 additions and 167 deletions

View file

@ -132,6 +132,7 @@ def completion(
# model specific optional params # model specific optional params
top_k=40,# used by text-bison only top_k=40,# used by text-bison only
task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
return_full_text: bool = False, # used by huggingface TGI
remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
request_timeout=0, # unused var for old version of OpenAI API request_timeout=0, # unused var for old version of OpenAI API
fallbacks=[], fallbacks=[],
@ -181,7 +182,8 @@ def completion(
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
top_k=top_k, top_k=top_k,
task=task, task=task,
remove_input=remove_input remove_input=remove_input,
return_full_text=return_full_text
) )
# For logging - save the values of the litellm-specific params passed in # For logging - save the values of the litellm-specific params passed in
litellm_params = get_litellm_params( litellm_params = get_litellm_params(

View file

@ -92,25 +92,6 @@ def test_completion_with_litellm_call_id():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_claude_stream():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="claude-2", messages=messages, stream=True)
# Add any assertions here to check the response
for chunk in response:
print(chunk["choices"][0]["delta"]) # same as openai format
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_claude_stream()
def test_completion_nlp_cloud(): def test_completion_nlp_cloud():
try: try:
messages = [ messages = [
@ -125,26 +106,6 @@ def test_completion_nlp_cloud():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_nlp_cloud_streaming():
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "how does a court case get to the Supreme Court?",
},
]
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
# Add any assertions here to check the response
for chunk in response:
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_nlp_cloud_streaming()
# test_completion_nlp_cloud_streaming()
# def test_completion_hf_api(): # def test_completion_hf_api():
# try: # try:
# user_message = "write some code to find the sum of two numbers" # user_message = "write some code to find the sum of two numbers"
@ -327,69 +288,6 @@ def test_completion_openai_with_more_optional_params():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_openai_with_stream():
try:
response = completion(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.5,
top_p=0.1,
n=2,
max_tokens=150,
presence_penalty=0.5,
stream=True,
frequency_penalty=-0.5,
logit_bias={27000: 5},
user="ishaan_dev@berri.ai",
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
if chunk["choices"][0]["finish_reason"] == "stop" or chunk["choices"][0]["finish_reason"] == "length":
break
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_openai_with_stream()
def test_completion_openai_with_functions():
function1 = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
if chunk["choices"][0]["finish_reason"] == "stop":
break
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_openai_with_functions()
# def test_completion_openai_azure_with_functions(): # def test_completion_openai_azure_with_functions():
# function1 = [ # function1 = [
# { # {
@ -544,20 +442,6 @@ def test_completion_replicate_vicuna():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_vicuna()
def test_completion_replicate_llama_stream():
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
try:
response = completion(model=model_name, messages=messages, stream=True)
# Add any assertions here to check the response
for chunk in response:
print(chunk)
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_replicate_llama_stream()
# def test_completion_replicate_stability_stream(): # def test_completion_replicate_stability_stream():
# model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb" # model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
# try: # try:
@ -653,26 +537,7 @@ def test_completion_bedrock_ai21():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False
response = completion(
model="bedrock/amazon.titan-tg1-large",
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_ai21_stream()
# test_completion_sagemaker()
######## Test VLLM ######## ######## Test VLLM ########
# def test_completion_vllm(): # def test_completion_vllm():
# try: # try:

View file

@ -213,7 +213,31 @@ def test_completion_cohere_stream():
print(f"completion_response: {complete_response}") print(f"completion_response: {complete_response}")
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_completion_bedrock_ai21_stream():
try:
litellm.set_verbose = False
response = completion(
model="bedrock/amazon.titan-tg1-large",
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
temperature=1,
max_tokens=4096,
stream=True,
)
# Add any assertions here to check the response
print(response)
for idx, chunk in enumerate(response):
chunk, finished = streaming_format_tests(idx, chunk)
if finished:
break
complete_response += chunk
if complete_response.strip() == "":
raise Exception("Empty response received")
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_cohere_stream() # test_completion_cohere_stream()
# test on openai completion call # test on openai completion call
@ -301,34 +325,66 @@ def test_together_ai_completion_call_starcoder():
except: except:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pass pass
# test_together_ai_completion_call_starcoder()
# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build def test_completion_nlp_cloud_streaming():
# def test_aleph_alpha_call(): try:
# try: messages = [
# start_time = time.time() {"role": "system", "content": "You are a helpful assistant."},
# response = completion( {
# model="luminous-base", "role": "user",
# messages=messages, "content": "how does a court case get to the Supreme Court?",
# logger_fn=logger_fn, },
# stream=True, ]
# ) response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
# complete_response = "" # Add any assertions here to check the response
# print(f"returned response object: {response}") for idx, chunk in enumerate(response):
# for chunk in response: chunk, finished = streaming_format_tests(idx, chunk)
# chunk_time = time.time() if finished:
# complete_response += ( break
# chunk["choices"][0]["delta"]["content"] complete_response += chunk
# if len(chunk["choices"][0]["delta"].keys()) > 0 if complete_response == "":
# else "" raise Exception("Empty response received")
# ) except Exception as e:
# if len(complete_response) > 0: pytest.fail(f"Error occurred: {e}")
# print(complete_response)
# if complete_response == "":
# raise Exception("Empty response received") #### Test Function calling + streaming ####
# except:
# print(f"error occurred: {traceback.format_exc()}") def test_completion_openai_with_functions():
# pass function1 = [
#### Test Async streaming {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
response = completion(
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
if chunk["choices"][0]["finish_reason"] == "stop":
break
print(chunk["choices"][0]["finish_reason"])
print(chunk["choices"][0]["delta"]["content"])
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_openai_with_functions()
#### Test Async streaming ####
# # test on ai21 completion call # # test on ai21 completion call
async def ai21_async_completion_call(): async def ai21_async_completion_call():

View file

@ -828,6 +828,7 @@ def get_optional_params( # use the openai defaults
model=None, model=None,
custom_llm_provider="", custom_llm_provider="",
top_k=40, top_k=40,
return_full_text=False,
task=None task=None
): ):
optional_params = {} optional_params = {}
@ -885,6 +886,7 @@ def get_optional_params( # use the openai defaults
optional_params["max_new_tokens"] = max_tokens optional_params["max_new_tokens"] = max_tokens
if presence_penalty != 0: if presence_penalty != 0:
optional_params["repetition_penalty"] = presence_penalty optional_params["repetition_penalty"] = presence_penalty
optional_params["return_full_text"] = return_full_text
optional_params["details"] = True optional_params["details"] = True
optional_params["task"] = task optional_params["task"] = task
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model): elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
@ -2507,7 +2509,6 @@ class CustomStreamWrapper:
model_response = ModelResponse(stream=True, model=self.model) model_response = ModelResponse(stream=True, model=self.model)
try: try:
# return this for all models # return this for all models
print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
if self.sent_first_chunk == False: if self.sent_first_chunk == False:
model_response.choices[0].delta.role = "assistant" model_response.choices[0].delta.role = "assistant"
self.sent_first_chunk = True self.sent_first_chunk = True

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.677" version = "0.1.678"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"