Merge pull request #3812 from afbarbaro/main

Fix issue with delta being None when Deferred / Async Content Filter is enabled on Azure OpenAI
This commit is contained in:
Krish Dholakia 2024-05-24 10:05:08 -07:00 committed by GitHub
commit 391a31c0ce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 261 additions and 5 deletions

View file

@ -235,6 +235,259 @@ def test_completion_azure_stream_special_char():
assert len(response_str) > 0 assert len(response_str) > 0
def test_completion_azure_stream_content_filter_no_delta():
"""
Tests streaming from Azure when the chunks have no delta because they represent the filtered content
"""
try:
chunks = [
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": "",
"role": "assistant"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": "This"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " is"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " a"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " dummy"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " response"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "",
"choices": [
{
"finish_reason": None,
"index": 0,
"content_filter_offsets": {
"check_offset": 35159,
"start_offset": 35159,
"end_offset": 36150
},
"content_filter_results": {
"hate": {
"filtered": False,
"severity": "safe"
},
"self_harm": {
"filtered": False,
"severity": "safe"
},
"sexual": {
"filtered": False,
"severity": "safe"
},
"violence": {
"filtered": False,
"severity": "safe"
}
}
}
],
"created": 0,
"model": "",
"object": ""
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": "."
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {},
"finish_reason": "stop",
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "",
"choices": [
{
"finish_reason": None,
"index": 0,
"content_filter_offsets": {
"check_offset": 36150,
"start_offset": 36060,
"end_offset": 37029
},
"content_filter_results": {
"hate": {
"filtered": False,
"severity": "safe"
},
"self_harm": {
"filtered": False,
"severity": "safe"
},
"sexual": {
"filtered": False,
"severity": "safe"
},
"violence": {
"filtered": False,
"severity": "safe"
}
}
}
],
"created": 0,
"model": "",
"object": ""
}
]
chunk_list = []
for chunk in chunks:
new_chunk = litellm.ModelResponse(stream=True, id=chunk["id"])
if "choices" in chunk and isinstance(chunk["choices"], list):
new_choices = []
for choice in chunk["choices"]:
if isinstance(choice, litellm.utils.StreamingChoices):
_new_choice = choice
elif isinstance(choice, dict):
_new_choice = litellm.utils.StreamingChoices(**choice)
new_choices.append(_new_choice)
new_chunk.choices = new_choices
chunk_list.append(new_chunk)
completion_stream = ModelResponseListIterator(model_responses=chunk_list)
litellm.set_verbose = True
response = litellm.CustomStreamWrapper(
completion_stream=completion_stream,
model="gpt-4-0613",
custom_llm_provider="cached_response",
logging_obj=litellm.Logging(
model="gpt-4-0613",
messages=[{"role": "user", "content": "Hey"}],
stream=True,
call_type="completion",
start_time=time.time(),
litellm_call_id="12345",
function_id="1245",
),
)
for idx, chunk in enumerate(response):
complete_response = ""
for idx, chunk in enumerate(response):
# print
delta = chunk.choices[0].delta
content = delta.content if delta else None
complete_response += content or ""
if chunk.choices[0].finish_reason is not None:
break
assert len(complete_response) > 0
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
def test_completion_cohere_stream_bad_key(): def test_completion_cohere_stream_bad_key():
try: try:
litellm.cache = None litellm.cache = None

View file

@ -10646,7 +10646,8 @@ class CustomStreamWrapper:
data_json = json.loads(chunk[5:]) # chunk.startswith("data:"): data_json = json.loads(chunk[5:]) # chunk.startswith("data:"):
try: try:
if len(data_json["choices"]) > 0: if len(data_json["choices"]) > 0:
text = data_json["choices"][0]["delta"].get("content", "") delta = data_json["choices"][0]["delta"]
text = "" if delta is None else delta.get("content", "")
if data_json["choices"][0].get("finish_reason", None): if data_json["choices"][0].get("finish_reason", None):
is_finished = True is_finished = True
finish_reason = data_json["choices"][0]["finish_reason"] finish_reason = data_json["choices"][0]["finish_reason"]
@ -11414,12 +11415,14 @@ class CustomStreamWrapper:
model_response.id = original_chunk.id model_response.id = original_chunk.id
self.response_id = original_chunk.id self.response_id = original_chunk.id
if len(original_chunk.choices) > 0: if len(original_chunk.choices) > 0:
delta = original_chunk.choices[0].delta
if ( if (
original_chunk.choices[0].delta.function_call is not None delta is not None and (
or original_chunk.choices[0].delta.tool_calls is not None delta.function_call is not None
or delta.tool_calls is not None
)
): ):
try: try:
delta = original_chunk.choices[0].delta
model_response.system_fingerprint = ( model_response.system_fingerprint = (
original_chunk.system_fingerprint original_chunk.system_fingerprint
) )
@ -11478,7 +11481,7 @@ class CustomStreamWrapper:
model_response.choices[0].delta = Delta() model_response.choices[0].delta = Delta()
else: else:
try: try:
delta = dict(original_chunk.choices[0].delta) delta = dict() if original_chunk.choices[0].delta is None else dict(original_chunk.choices[0].delta)
print_verbose(f"original delta: {delta}") print_verbose(f"original delta: {delta}")
model_response.choices[0].delta = Delta(**delta) model_response.choices[0].delta = Delta(**delta)
print_verbose( print_verbose(