mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
ensure streaming format is exactly the same as openai
This commit is contained in:
parent
ebd4688fec
commit
21cd55ab26
6 changed files with 275 additions and 169 deletions
|
@ -24,6 +24,170 @@ def logger_fn(model_call_object: dict):
|
|||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
|
||||
first_openai_chunk_example = {
|
||||
"id": "chatcmpl-7zSKLBVXnX9dwgRuDYVqVVDsgh2yp",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1694881253,
|
||||
"model": "gpt-4-0613",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {
|
||||
"role": "assistant",
|
||||
"content": ""
|
||||
},
|
||||
"finish_reason": None # it's null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def validate_first_format(chunk):
|
||||
# write a test to make sure chunk follows the same format as first_openai_chunk_example
|
||||
assert isinstance(chunk, dict), "Chunk should be a dictionary."
|
||||
assert "id" in chunk, "Chunk should have an 'id'."
|
||||
assert isinstance(chunk['id'], str), "'id' should be a string."
|
||||
|
||||
assert "object" in chunk, "Chunk should have an 'object'."
|
||||
assert isinstance(chunk['object'], str), "'object' should be a string."
|
||||
|
||||
assert "created" in chunk, "Chunk should have a 'created'."
|
||||
assert isinstance(chunk['created'], int), "'created' should be an integer."
|
||||
|
||||
assert "model" in chunk, "Chunk should have a 'model'."
|
||||
assert isinstance(chunk['model'], str), "'model' should be a string."
|
||||
|
||||
assert "choices" in chunk, "Chunk should have 'choices'."
|
||||
assert isinstance(chunk['choices'], list), "'choices' should be a list."
|
||||
|
||||
for choice in chunk['choices']:
|
||||
assert isinstance(choice, dict), "Each choice should be a dictionary."
|
||||
|
||||
assert "index" in choice, "Each choice should have 'index'."
|
||||
assert isinstance(choice['index'], int), "'index' should be an integer."
|
||||
|
||||
assert "delta" in choice, "Each choice should have 'delta'."
|
||||
assert isinstance(choice['delta'], dict), "'delta' should be a dictionary."
|
||||
|
||||
assert "role" in choice['delta'], "'delta' should have a 'role'."
|
||||
assert isinstance(choice['delta']['role'], str), "'role' should be a string."
|
||||
|
||||
assert "content" in choice['delta'], "'delta' should have 'content'."
|
||||
assert isinstance(choice['delta']['content'], str), "'content' should be a string."
|
||||
|
||||
assert "finish_reason" in choice, "Each choice should have 'finish_reason'."
|
||||
assert (choice['finish_reason'] is None) or isinstance(choice['finish_reason'], str), "'finish_reason' should be None or a string."
|
||||
|
||||
second_openai_chunk_example = {
|
||||
"id": "chatcmpl-7zSKLBVXnX9dwgRuDYVqVVDsgh2yp",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1694881253,
|
||||
"model": "gpt-4-0613",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {
|
||||
"content": "Hello"
|
||||
},
|
||||
"finish_reason": None # it's null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def validate_second_format(chunk):
|
||||
assert isinstance(chunk, dict), "Chunk should be a dictionary."
|
||||
assert "id" in chunk, "Chunk should have an 'id'."
|
||||
assert isinstance(chunk['id'], str), "'id' should be a string."
|
||||
|
||||
assert "object" in chunk, "Chunk should have an 'object'."
|
||||
assert isinstance(chunk['object'], str), "'object' should be a string."
|
||||
|
||||
assert "created" in chunk, "Chunk should have a 'created'."
|
||||
assert isinstance(chunk['created'], int), "'created' should be an integer."
|
||||
|
||||
assert "model" in chunk, "Chunk should have a 'model'."
|
||||
assert isinstance(chunk['model'], str), "'model' should be a string."
|
||||
|
||||
assert "choices" in chunk, "Chunk should have 'choices'."
|
||||
assert isinstance(chunk['choices'], list), "'choices' should be a list."
|
||||
|
||||
for choice in chunk['choices']:
|
||||
assert isinstance(choice, dict), "Each choice should be a dictionary."
|
||||
|
||||
assert "index" in choice, "Each choice should have 'index'."
|
||||
assert isinstance(choice['index'], int), "'index' should be an integer."
|
||||
|
||||
assert "delta" in choice, "Each choice should have 'delta'."
|
||||
assert isinstance(choice['delta'], dict), "'delta' should be a dictionary."
|
||||
|
||||
assert "content" in choice['delta'], "'delta' should have 'content'."
|
||||
assert isinstance(choice['delta']['content'], str), "'content' should be a string."
|
||||
|
||||
assert "finish_reason" in choice, "Each choice should have 'finish_reason'."
|
||||
assert (choice['finish_reason'] is None) or isinstance(choice['finish_reason'], str), "'finish_reason' should be None or a string."
|
||||
|
||||
last_openai_chunk_example = {
|
||||
"id": "chatcmpl-7zSKLBVXnX9dwgRuDYVqVVDsgh2yp",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1694881253,
|
||||
"model": "gpt-4-0613",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def validate_last_format(chunk):
|
||||
assert isinstance(chunk, dict), "Chunk should be a dictionary."
|
||||
assert "id" in chunk, "Chunk should have an 'id'."
|
||||
assert isinstance(chunk['id'], str), "'id' should be a string."
|
||||
|
||||
assert "object" in chunk, "Chunk should have an 'object'."
|
||||
assert isinstance(chunk['object'], str), "'object' should be a string."
|
||||
|
||||
assert "created" in chunk, "Chunk should have a 'created'."
|
||||
assert isinstance(chunk['created'], int), "'created' should be an integer."
|
||||
|
||||
assert "model" in chunk, "Chunk should have a 'model'."
|
||||
assert isinstance(chunk['model'], str), "'model' should be a string."
|
||||
|
||||
assert "choices" in chunk, "Chunk should have 'choices'."
|
||||
assert isinstance(chunk['choices'], list), "'choices' should be a list."
|
||||
|
||||
for choice in chunk['choices']:
|
||||
assert isinstance(choice, dict), "Each choice should be a dictionary."
|
||||
|
||||
assert "index" in choice, "Each choice should have 'index'."
|
||||
assert isinstance(choice['index'], int), "'index' should be an integer."
|
||||
|
||||
assert "delta" in choice, "Each choice should have 'delta'."
|
||||
assert isinstance(choice['delta'], dict), "'delta' should be a dictionary."
|
||||
|
||||
assert "finish_reason" in choice, "Each choice should have 'finish_reason'."
|
||||
assert isinstance(choice['finish_reason'], str), "'finish_reason' should be a string."
|
||||
|
||||
def streaming_format_tests(idx, chunk):
|
||||
extracted_chunk = ""
|
||||
finished = False
|
||||
if idx == 0: # ensure role assistant is set
|
||||
validate_first_format(chunk=chunk)
|
||||
role = chunk["choices"][0]["delta"]["role"]
|
||||
assert role == "assistant"
|
||||
elif idx == 1: # second chunk
|
||||
validate_second_format(chunk=chunk)
|
||||
if idx != 0: # ensure no role
|
||||
if "role" in chunk["choices"][0]["delta"]:
|
||||
raise Exception("role should not exist after first chunk")
|
||||
if chunk["choices"][0]["finish_reason"]: # ensure finish reason is only in last chunk
|
||||
validate_last_format(chunk=chunk)
|
||||
finished = True
|
||||
if "content" in chunk["choices"][0]["delta"]:
|
||||
extracted_chunk = chunk["choices"][0]["delta"]["content"]
|
||||
return extracted_chunk, finished
|
||||
|
||||
def test_completion_cohere_stream():
|
||||
try:
|
||||
messages = [
|
||||
|
@ -38,36 +202,18 @@ def test_completion_cohere_stream():
|
|||
)
|
||||
complete_response = ""
|
||||
# Add any assertions here to check the response
|
||||
for chunk in response:
|
||||
print(f"chunk: {chunk}")
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
if complete_response == "":
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except KeyError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test on baseten completion call
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="baseten/RqgAEn0", messages=messages, logger_fn=logger_fn
|
||||
# )
|
||||
# print(f"response: {response}")
|
||||
# complete_response = ""
|
||||
# start_time = time.time()
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
# print(chunk["choices"][0]["delta"])
|
||||
# complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
# if complete_response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# print(f"complete response: {complete_response}")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
# test_completion_cohere_stream()
|
||||
|
||||
# test on openai completion call
|
||||
def test_openai_text_completion_call():
|
||||
|
@ -77,16 +223,17 @@ def test_openai_text_completion_call():
|
|||
)
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"chunk: {chunk}")
|
||||
if "content" in chunk["choices"][0]["delta"]:
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
if complete_response == "":
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
||||
|
||||
test_openai_text_completion_call()
|
||||
|
||||
# # test on ai21 completion call
|
||||
def ai21_completion_call():
|
||||
|
@ -97,18 +244,18 @@ def ai21_completion_call():
|
|||
print(f"response: {response}")
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
print(chunk)
|
||||
if "content" in chunk["choices"][0]["delta"]:
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
if complete_response == "":
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
pytest.fail(f"error occurred: {traceback.format_exc()}")
|
||||
|
||||
# ai21_completion_call()
|
||||
# test on openai completion call
|
||||
def test_openai_chat_completion_call():
|
||||
try:
|
||||
|
@ -117,107 +264,20 @@ def test_openai_chat_completion_call():
|
|||
)
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
if chunk["choices"][0]["finish_reason"]:
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
# if chunk["choices"][0]["delta"]["role"] != "assistant":
|
||||
# raise Exception("invalid role")
|
||||
if "content" in chunk["choices"][0]["delta"]:
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
complete_response += chunk
|
||||
# print(f'complete_chunk: {complete_response}')
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"complete response: {complete_response}")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
test_openai_chat_completion_call()
|
||||
async def completion_call():
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
||||
)
|
||||
print(f"response: {response}")
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
# Change for loop to async for loop
|
||||
async for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
if "content" in chunk["choices"][0]["delta"]:
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
# asyncio.run(completion_call())
|
||||
|
||||
# # test on azure completion call
|
||||
# try:
|
||||
# response = completion(
|
||||
# model="azure/chatgpt-test", messages=messages, stream=True, logger_fn=logger_fn
|
||||
# )
|
||||
# response = ""
|
||||
# start_time = time.time()
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
# print(chunk["choices"][0]["delta"])
|
||||
# response += chunk["choices"][0]["delta"]
|
||||
# if response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
|
||||
|
||||
# # test on huggingface completion call
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = completion(
|
||||
# model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
||||
# )
|
||||
# complete_response = ""
|
||||
# for chunk in response:
|
||||
# chunk_time = time.time()
|
||||
# print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
# print(chunk["choices"][0]["delta"])
|
||||
# complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else ""
|
||||
# if complete_response == "":
|
||||
# raise Exception("Empty response received")
|
||||
# except:
|
||||
# print(f"error occurred: {traceback.format_exc()}")
|
||||
# pass
|
||||
|
||||
# test on together ai completion call - replit-code-3b
|
||||
def test_together_ai_completion_call_replit():
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = completion(
|
||||
model="Replit-Code-3B", messages=messages, logger_fn=logger_fn, stream=True
|
||||
)
|
||||
complete_response = ""
|
||||
print(f"returned response object: {response}")
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.2f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
complete_response += (
|
||||
chunk["choices"][0]["delta"]["content"]
|
||||
if len(chunk["choices"][0]["delta"].keys()) > 0
|
||||
else ""
|
||||
)
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
except KeyError as e:
|
||||
pass
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
# test_openai_chat_completion_call()
|
||||
|
||||
# # test on together ai completion call - starcoder
|
||||
def test_together_ai_completion_call_starcoder():
|
||||
|
@ -231,23 +291,18 @@ def test_together_ai_completion_call_starcoder():
|
|||
)
|
||||
complete_response = ""
|
||||
print(f"returned response object: {response}")
|
||||
for chunk in response:
|
||||
chunk_time = time.time()
|
||||
complete_response += (
|
||||
chunk["choices"][0]["delta"]["content"]
|
||||
if len(chunk["choices"][0]["delta"].keys()) > 0
|
||||
else ""
|
||||
)
|
||||
if len(complete_response) > 0:
|
||||
print(complete_response)
|
||||
for idx, chunk in enumerate(response):
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
except KeyError as e:
|
||||
pass
|
||||
print(f"complete response: {complete_response}")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
# test_together_ai_completion_call_starcoder()
|
||||
# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build
|
||||
# def test_aleph_alpha_call():
|
||||
# try:
|
||||
|
@ -286,13 +341,43 @@ async def ai21_async_completion_call():
|
|||
complete_response = ""
|
||||
start_time = time.time()
|
||||
# Change for loop to async for loop
|
||||
idx = 0
|
||||
async for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
complete_response += chunk["choices"][0]["delta"]["content"]
|
||||
if complete_response == "":
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
idx += 1
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"complete response: {complete_response}")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
pass
|
||||
|
||||
# asyncio.run(ai21_async_completion_call())
|
||||
|
||||
async def completion_call():
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
||||
)
|
||||
print(f"response: {response}")
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
# Change for loop to async for loop
|
||||
idx = 0
|
||||
async for chunk in response:
|
||||
chunk, finished = streaming_format_tests(idx, chunk)
|
||||
if finished:
|
||||
break
|
||||
complete_response += chunk
|
||||
idx += 1
|
||||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"complete response: {complete_response}")
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
# asyncio.run(completion_call())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue