mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
(test) fix streaming test - running into rate limits, set max_tokens
This commit is contained in:
parent
098a86f678
commit
b9d249ba32
1 changed files with 13 additions and 11 deletions
|
@ -219,7 +219,7 @@ def test_completion_cohere_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
test_completion_cohere_stream()
|
# test_completion_cohere_stream()
|
||||||
|
|
||||||
def test_completion_cohere_stream_bad_key():
|
def test_completion_cohere_stream_bad_key():
|
||||||
try:
|
try:
|
||||||
|
@ -453,7 +453,7 @@ def test_completion_palm_stream():
|
||||||
print(f"completion_response: {complete_response}")
|
print(f"completion_response: {complete_response}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_completion_palm_stream()
|
# test_completion_palm_stream()
|
||||||
|
|
||||||
# def test_completion_deep_infra_stream():
|
# def test_completion_deep_infra_stream():
|
||||||
# # deep infra currently includes role in the 2nd chunk
|
# # deep infra currently includes role in the 2nd chunk
|
||||||
|
@ -842,7 +842,7 @@ def ai21_completion_call_bad_key():
|
||||||
def test_openai_chat_completion_call():
|
def test_openai_chat_completion_call():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn, max_tokens=10
|
||||||
)
|
)
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -943,7 +943,7 @@ def test_completion_openai_with_functions():
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
|
model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True,
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
|
@ -987,7 +987,7 @@ async def ai21_async_completion_call():
|
||||||
async def completion_call():
|
async def completion_call():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn, max_tokens=10
|
||||||
)
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
|
@ -1260,7 +1260,7 @@ def test_openai_streaming_and_function_calling():
|
||||||
messages=[{"role": "user", "content": "What is the weather like in Boston?"}]
|
messages=[{"role": "user", "content": "What is the weather like in Boston?"}]
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="gpt-3.5-turbo", functions=function1, messages=messages, stream=True
|
model="gpt-3.5-turbo", functions=function1, messages=messages, stream=True,
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
for idx, chunk in enumerate(response):
|
for idx, chunk in enumerate(response):
|
||||||
|
@ -1270,8 +1270,6 @@ def test_openai_streaming_and_function_calling():
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
# test_openai_streaming_and_function_calling()
|
# test_openai_streaming_and_function_calling()
|
||||||
import litellm
|
|
||||||
|
|
||||||
|
|
||||||
def test_success_callback_streaming():
|
def test_success_callback_streaming():
|
||||||
def success_callback(kwargs, completion_response, start_time, end_time):
|
def success_callback(kwargs, completion_response, start_time, end_time):
|
||||||
|
@ -1289,12 +1287,16 @@ def test_success_callback_streaming():
|
||||||
litellm.success_callback = [success_callback]
|
litellm.success_callback = [success_callback]
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "hello"}]
|
messages = [{"role": "user", "content": "hello"}]
|
||||||
|
print("TESTING LITELLM COMPLETION CALL")
|
||||||
response = litellm.completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
response = litellm.completion(
|
||||||
|
model="j2-light",
|
||||||
|
messages=messages, stream=True,
|
||||||
|
max_tokens=5,
|
||||||
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
|
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk["choices"][0])
|
print(chunk["choices"][0])
|
||||||
|
|
||||||
test_success_callback_streaming()
|
# test_success_callback_streaming()
|
Loading…
Add table
Add a link
Reference in a new issue