mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
fixes to testing
This commit is contained in:
parent
09d63a6e73
commit
ce827faa93
7 changed files with 91 additions and 167 deletions
Binary file not shown.
Binary file not shown.
|
@ -132,6 +132,7 @@ def completion(
|
|||
# model specific optional params
|
||||
top_k=40,# used by text-bison only
|
||||
task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
|
||||
return_full_text: bool = False, # used by huggingface TGI
|
||||
remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
|
||||
request_timeout=0, # unused var for old version of OpenAI API
|
||||
fallbacks=[],
|
||||
|
@ -181,7 +182,8 @@ def completion(
|
|||
custom_llm_provider=custom_llm_provider,
|
||||
top_k=top_k,
|
||||
task=task,
|
||||
remove_input=remove_input
|
||||
remove_input=remove_input,
|
||||
return_full_text=return_full_text
|
||||
)
|
||||
# For logging - save the values of the litellm-specific params passed in
|
||||
litellm_params = get_litellm_params(
|
||||
|
|
|
@ -92,25 +92,6 @@ def test_completion_with_litellm_call_id():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_claude_stream():
|
||||
try:
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(model="claude-2", messages=messages, stream=True)
|
||||
# Add any assertions here to check the response
|
||||
for chunk in response:
|
||||
print(chunk["choices"][0]["delta"]) # same as openai format
|
||||
print(chunk["choices"][0]["finish_reason"])
|
||||
print(chunk["choices"][0]["delta"]["content"])
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_claude_stream()
|
||||
|
||||
def test_completion_nlp_cloud():
|
||||
try:
|
||||
messages = [
|
||||
|
@ -125,26 +106,6 @@ def test_completion_nlp_cloud():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
def test_completion_nlp_cloud_streaming():
|
||||
try:
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
|
||||
# Add any assertions here to check the response
|
||||
for chunk in response:
|
||||
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
|
||||
print(chunk["choices"][0]["finish_reason"])
|
||||
print(chunk["choices"][0]["delta"]["content"])
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_nlp_cloud_streaming()
|
||||
|
||||
# test_completion_nlp_cloud_streaming()
|
||||
# def test_completion_hf_api():
|
||||
# try:
|
||||
# user_message = "write some code to find the sum of two numbers"
|
||||
|
@ -327,69 +288,6 @@ def test_completion_openai_with_more_optional_params():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_openai_with_stream():
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
temperature=0.5,
|
||||
top_p=0.1,
|
||||
n=2,
|
||||
max_tokens=150,
|
||||
presence_penalty=0.5,
|
||||
stream=True,
|
||||
frequency_penalty=-0.5,
|
||||
logit_bias={27000: 5},
|
||||
user="ishaan_dev@berri.ai",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
if chunk["choices"][0]["finish_reason"] == "stop" or chunk["choices"][0]["finish_reason"] == "length":
|
||||
break
|
||||
print(chunk["choices"][0]["finish_reason"])
|
||||
print(chunk["choices"][0]["delta"]["content"])
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_openai_with_stream()
|
||||
|
||||
def test_completion_openai_with_functions():
|
||||
function1 = [
|
||||
{
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
if chunk["choices"][0]["finish_reason"] == "stop":
|
||||
break
|
||||
print(chunk["choices"][0]["finish_reason"])
|
||||
print(chunk["choices"][0]["delta"]["content"])
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_openai_with_functions()
|
||||
|
||||
|
||||
# def test_completion_openai_azure_with_functions():
|
||||
# function1 = [
|
||||
# {
|
||||
|
@ -544,20 +442,6 @@ def test_completion_replicate_vicuna():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_replicate_vicuna()
|
||||
|
||||
def test_completion_replicate_llama_stream():
|
||||
model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||
try:
|
||||
response = completion(model=model_name, messages=messages, stream=True)
|
||||
# Add any assertions here to check the response
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
print(chunk["choices"][0]["delta"]["content"])
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_replicate_llama_stream()
|
||||
|
||||
# def test_completion_replicate_stability_stream():
|
||||
# model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
|
||||
# try:
|
||||
|
@ -653,26 +537,7 @@ def test_completion_bedrock_ai21():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
def test_completion_bedrock_ai21_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
response = completion(
|
||||
model="bedrock/amazon.titan-tg1-large",
|
||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||
temperature=1,
|
||||
max_tokens=4096,
|
||||
stream=True,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_bedrock_ai21_stream()
|
||||
|
||||
|
||||
# test_completion_sagemaker()
|
||||
######## Test VLLM ########
|
||||
# def test_completion_vllm():
|
||||
# try:
|
||||
|
|
|
@ -213,7 +213,31 @@ def test_completion_cohere_stream():
|
|||
print(f"completion_response: {complete_response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
|
||||
def test_completion_bedrock_ai21_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
response = completion(
|
||||
model="bedrock/amazon.titan-tg1-large",
|
||||
messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
|
||||
temperature=1,
|
||||
max_tokens=4096,
|
||||
stream=True,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_cohere_stream()
|
||||
|
||||
# test on openai completion call
|
||||
|
@ -301,34 +325,66 @@ def test_together_ai_completion_call_starcoder():
|
|||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
# test_together_ai_completion_call_starcoder()
|
||||
# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build
|
||||
# def test_aleph_alpha_call():
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = completion(
|
||||
# model="luminous-base",
|
||||
# messages=messages,
|
||||
# logger_fn=logger_fn,
|
||||
# stream=True,
|
||||
# )
|
||||
# complete_response = ""
|
||||
# print(f"returned response object: {response}")
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# complete_response += (
|
||||
# chunk["choices"][0]["delta"]["content"]
|
||||
# if len(chunk["choices"][0]["delta"].keys()) > 0
|
||||
# else ""
|
||||
# )
|
||||
# if len(complete_response) > 0:
|
||||
# print(complete_response)
|
||||
# if complete_response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
#### Test Async streaming
|
||||
|
||||
def test_completion_nlp_cloud_streaming():
|
||||
try:
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how does a court case get to the Supreme Court?",
|
||||
},
|
||||
]
|
||||
response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
#### Test Function calling + streaming ####
|
||||
|
||||
def test_completion_openai_with_functions():
|
||||
function1 = [
|
||||
{
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
if chunk["choices"][0]["finish_reason"] == "stop":
|
||||
break
|
||||
print(chunk["choices"][0]["finish_reason"])
|
||||
print(chunk["choices"][0]["delta"]["content"])
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
test_completion_openai_with_functions()
|
||||
|
||||
#### Test Async streaming ####
|
||||
|
||||
# # test on ai21 completion call
|
||||
async def ai21_async_completion_call():
|
||||
|
|
|
@ -828,6 +828,7 @@ def get_optional_params( # use the openai defaults
|
|||
model=None,
|
||||
custom_llm_provider="",
|
||||
top_k=40,
|
||||
return_full_text=False,
|
||||
task=None
|
||||
):
|
||||
optional_params = {}
|
||||
|
@ -885,6 +886,7 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["max_new_tokens"] = max_tokens
|
||||
if presence_penalty != 0:
|
||||
optional_params["repetition_penalty"] = presence_penalty
|
||||
optional_params["return_full_text"] = return_full_text
|
||||
optional_params["details"] = True
|
||||
optional_params["task"] = task
|
||||
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
||||
|
@ -2507,7 +2509,6 @@ class CustomStreamWrapper:
|
|||
model_response = ModelResponse(stream=True, model=self.model)
|
||||
try:
|
||||
# return this for all models
|
||||
print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
|
||||
if self.sent_first_chunk == False:
|
||||
model_response.choices[0].delta.role = "assistant"
|
||||
self.sent_first_chunk = True
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.677"
|
||||
version = "0.1.678"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue