mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fixes to testing
This commit is contained in:
parent
09d63a6e73
commit
ce827faa93
7 changed files with 91 additions and 167 deletions
Binary file not shown.
Binary file not shown.
|
@ -132,6 +132,7 @@ def completion(
|
||||||
# model specific optional params
|
# model specific optional params
|
||||||
top_k=40,# used by text-bison only
|
top_k=40,# used by text-bison only
|
||||||
task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
|
task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
|
||||||
|
return_full_text: bool = False, # used by huggingface TGI
|
||||||
remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
|
remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
|
||||||
request_timeout=0, # unused var for old version of OpenAI API
|
request_timeout=0, # unused var for old version of OpenAI API
|
||||||
fallbacks=[],
|
fallbacks=[],
|
||||||
|
@ -181,7 +182,8 @@ def completion(
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
task=task,
|
task=task,
|
||||||
remove_input=remove_input
|
remove_input=remove_input,
|
||||||
|
return_full_text=return_full_text
|
||||||
)
|
)
|
||||||
# For logging - save the values of the litellm-specific params passed in
|
# For logging - save the values of the litellm-specific params passed in
|
||||||
litellm_params = get_litellm_params(
|
litellm_params = get_litellm_params(
|
||||||
|
|
|
@ -92,25 +92,6 @@ def test_completion_with_litellm_call_id():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_claude_stream():
|
|
||||||
try:
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "how does a court case get to the Supreme Court?",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
response = completion(model="claude-2", messages=messages, stream=True)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk["choices"][0]["delta"]) # same as openai format
|
|
||||||
print(chunk["choices"][0]["finish_reason"])
|
|
||||||
print(chunk["choices"][0]["delta"]["content"])
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# test_completion_claude_stream()
|
|
||||||
|
|
||||||
def test_completion_nlp_cloud():
|
def test_completion_nlp_cloud():
|
||||||
try:
|
try:
|
||||||
messages = [
|
messages = [
|
||||||
|
@ -125,26 +106,6 @@ def test_completion_nlp_cloud():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
def test_completion_nlp_cloud_streaming():
|
|
||||||
try:
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "how does a court case get to the Supreme Court?",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
|
|
||||||
print(chunk["choices"][0]["finish_reason"])
|
|
||||||
print(chunk["choices"][0]["delta"]["content"])
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# test_completion_nlp_cloud_streaming()
|
|
||||||
|
|
||||||
# test_completion_nlp_cloud_streaming()
|
|
||||||
# def test_completion_hf_api():
|
# def test_completion_hf_api():
|
||||||
# try:
|
# try:
|
||||||
# user_message = "write some code to find the sum of two numbers"
|
# user_message = "write some code to find the sum of two numbers"
|
||||||
|
@ -327,69 +288,6 @@ def test_completion_openai_with_more_optional_params():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_openai_with_stream():
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=messages,
|
|
||||||
temperature=0.5,
|
|
||||||
top_p=0.1,
|
|
||||||
n=2,
|
|
||||||
max_tokens=150,
|
|
||||||
presence_penalty=0.5,
|
|
||||||
stream=True,
|
|
||||||
frequency_penalty=-0.5,
|
|
||||||
logit_bias={27000: 5},
|
|
||||||
user="ishaan_dev@berri.ai",
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
if chunk["choices"][0]["finish_reason"] == "stop" or chunk["choices"][0]["finish_reason"] == "length":
|
|
||||||
break
|
|
||||||
print(chunk["choices"][0]["finish_reason"])
|
|
||||||
print(chunk["choices"][0]["delta"]["content"])
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# test_completion_openai_with_stream()
|
|
||||||
|
|
||||||
def test_completion_openai_with_functions():
|
|
||||||
function1 = [
|
|
||||||
{
|
|
||||||
"name": "get_current_weather",
|
|
||||||
"description": "Get the current weather in a given location",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
||||||
},
|
|
||||||
"required": ["location"],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
if chunk["choices"][0]["finish_reason"] == "stop":
|
|
||||||
break
|
|
||||||
print(chunk["choices"][0]["finish_reason"])
|
|
||||||
print(chunk["choices"][0]["delta"]["content"])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# test_completion_openai_with_functions()
|
|
||||||
|
|
||||||
|
|
||||||
# def test_completion_openai_azure_with_functions():
|
# def test_completion_openai_azure_with_functions():
|
||||||
# function1 = [
|
# function1 = [
|
||||||
# {
|
# {
|
||||||
|
@ -544,20 +442,6 @@ def test_completion_replicate_vicuna():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
# test_completion_replicate_vicuna()
|
|
||||||
|
|
||||||
def test_completion_replicate_llama_stream():
|
|
||||||
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
|
||||||
try:
|
|
||||||
response = completion(model=model_name, messages=messages, stream=True)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
print(chunk["choices"][0]["delta"]["content"])
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# test_completion_replicate_llama_stream()
|
|
||||||
|
|
||||||
# def test_completion_replicate_stability_stream():
|
# def test_completion_replicate_stability_stream():
|
||||||
# model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
# model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
||||||
# try:
|
# try:
|
||||||
|
@ -653,26 +537,7 @@ def test_completion_bedrock_ai21():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
def test_completion_bedrock_ai21_stream():
|
|
||||||
try:
|
|
||||||
litellm.set_verbose = False
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/amazon.titan-tg1-large",
|
|
||||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
|
||||||
temperature=1,
|
|
||||||
max_tokens=4096,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# test_completion_bedrock_ai21_stream()
|
|
||||||
|
|
||||||
|
|
||||||
# test_completion_sagemaker()
|
|
||||||
######## Test VLLM ########
|
######## Test VLLM ########
|
||||||
# def test_completion_vllm():
|
# def test_completion_vllm():
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -214,6 +214,30 @@ def test_completion_cohere_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_bedrock_ai21_stream():
|
||||||
|
try:
|
||||||
|
litellm.set_verbose = False
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/amazon.titan-tg1-large",
|
||||||
|
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||||
|
temperature=1,
|
||||||
|
max_tokens=4096,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_completion_cohere_stream()
|
# test_completion_cohere_stream()
|
||||||
|
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
|
@ -301,34 +325,66 @@ def test_together_ai_completion_call_starcoder():
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
# test_together_ai_completion_call_starcoder()
|
|
||||||
# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build
|
def test_completion_nlp_cloud_streaming():
|
||||||
# def test_aleph_alpha_call():
|
try:
|
||||||
# try:
|
messages = [
|
||||||
# start_time = time.time()
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
# response = completion(
|
{
|
||||||
# model="luminous-base",
|
"role": "user",
|
||||||
# messages=messages,
|
"content": "how does a court case get to the Supreme Court?",
|
||||||
# logger_fn=logger_fn,
|
},
|
||||||
# stream=True,
|
]
|
||||||
# )
|
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
|
||||||
# complete_response = ""
|
# Add any assertions here to check the response
|
||||||
# print(f"returned response object: {response}")
|
for idx, chunk in enumerate(response):
|
||||||
# for chunk in response:
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
# chunk_time = time.time()
|
if finished:
|
||||||
# complete_response += (
|
break
|
||||||
# chunk["choices"][0]["delta"]["content"]
|
complete_response += chunk
|
||||||
# if len(chunk["choices"][0]["delta"].keys()) > 0
|
if complete_response == "":
|
||||||
# else ""
|
raise Exception("Empty response received")
|
||||||
# )
|
except Exception as e:
|
||||||
# if len(complete_response) > 0:
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# print(complete_response)
|
|
||||||
# if complete_response == "":
|
|
||||||
# raise Exception("Empty response received")
|
#### Test Function calling + streaming ####
|
||||||
# except:
|
|
||||||
# print(f"error occurred: {traceback.format_exc()}")
|
def test_completion_openai_with_functions():
|
||||||
# pass
|
function1 = [
|
||||||
#### Test Async streaming
|
{
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
if chunk["choices"][0]["finish_reason"] == "stop":
|
||||||
|
break
|
||||||
|
print(chunk["choices"][0]["finish_reason"])
|
||||||
|
print(chunk["choices"][0]["delta"]["content"])
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
test_completion_openai_with_functions()
|
||||||
|
|
||||||
|
#### Test Async streaming ####
|
||||||
|
|
||||||
# # test on ai21 completion call
|
# # test on ai21 completion call
|
||||||
async def ai21_async_completion_call():
|
async def ai21_async_completion_call():
|
||||||
|
|
|
@ -828,6 +828,7 @@ def get_optional_params( # use the openai defaults
|
||||||
model=None,
|
model=None,
|
||||||
custom_llm_provider="",
|
custom_llm_provider="",
|
||||||
top_k=40,
|
top_k=40,
|
||||||
|
return_full_text=False,
|
||||||
task=None
|
task=None
|
||||||
):
|
):
|
||||||
optional_params = {}
|
optional_params = {}
|
||||||
|
@ -885,6 +886,7 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["max_new_tokens"] = max_tokens
|
optional_params["max_new_tokens"] = max_tokens
|
||||||
if presence_penalty != 0:
|
if presence_penalty != 0:
|
||||||
optional_params["repetition_penalty"] = presence_penalty
|
optional_params["repetition_penalty"] = presence_penalty
|
||||||
|
optional_params["return_full_text"] = return_full_text
|
||||||
optional_params["details"] = True
|
optional_params["details"] = True
|
||||||
optional_params["task"] = task
|
optional_params["task"] = task
|
||||||
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
||||||
|
@ -2507,7 +2509,6 @@ class CustomStreamWrapper:
|
||||||
model_response = ModelResponse(stream=True, model=self.model)
|
model_response = ModelResponse(stream=True, model=self.model)
|
||||||
try:
|
try:
|
||||||
# return this for all models
|
# return this for all models
|
||||||
print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
|
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk == False:
|
||||||
model_response.choices[0].delta.role = "assistant"
|
model_response.choices[0].delta.role = "assistant"
|
||||||
self.sent_first_chunk = True
|
self.sent_first_chunk = True
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.677"
|
version = "0.1.678"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue