fix(utils.py): fix

This commit is contained in:
Krrish Dholakia 2024-06-04 19:41:20 -07:00
parent 54dacfdf61
commit 43af5575c8
2 changed files with 38 additions and 16 deletions

View file

@ -1997,20 +1997,42 @@ def test_openai_chat_completion_complete_response_call():
"model",
["gpt-3.5-turbo", "azure/chatgpt-v-2"],
)
def test_openai_stream_options_call(model):
@pytest.mark.parametrize(
"sync",
[True, False],
)
@pytest.mark.asyncio
async def test_openai_stream_options_call(model, sync):
litellm.set_verbose = False
usage = None
chunks = []
if sync:
response = litellm.completion(
model=model,
messages=[{"role": "system", "content": "say GM - we're going to make it "}],
messages=[
{"role": "system", "content": "say GM - we're going to make it "}
],
stream=True,
stream_options={"include_usage": True},
max_tokens=10,
)
usage = None
chunks = []
for chunk in response:
print("chunk: ", chunk)
chunks.append(chunk)
else:
response = await litellm.acompletion(
model=model,
messages=[
{"role": "system", "content": "say GM - we're going to make it "}
],
stream=True,
stream_options={"include_usage": True},
max_tokens=10,
)
async for chunk in response:
print("chunk: ", chunk)
chunks.append(chunk)
last_chunk = chunks[-1]
print("last chunk: ", last_chunk)

View file

@ -11914,7 +11914,7 @@ class CustomStreamWrapper:
input=self.response_uptil_now, model=self.model
)
print_verbose(f"final returned processed chunk: {processed_chunk}")
self.chunks.append(response)
self.chunks.append(processed_chunk)
return processed_chunk
raise StopAsyncIteration
else: # temporary patch for non-aiohttp async calls
@ -11954,7 +11954,7 @@ class CustomStreamWrapper:
input=self.response_uptil_now, model=self.model
)
# RETURN RESULT
self.chunks.append(response)
self.chunks.append(processed_chunk)
return processed_chunk
except StopAsyncIteration:
if self.sent_last_chunk == True:
@ -11965,17 +11965,17 @@ class CustomStreamWrapper:
):
# send the final chunk with stream options
complete_streaming_response = litellm.stream_chunk_builder(
chunks=self.chunks
chunks=self.chunks, messages=self.messages
)
response = self.model_response_creator()
response.usage = complete_streaming_response.usage
## LOGGING
threading.Thread(
target=self.logging_obj.success_handler, args=(processed_chunk,)
target=self.logging_obj.success_handler, args=(response,)
).start() # log response
asyncio.create_task(
self.logging_obj.async_success_handler(
processed_chunk,
response,
)
)
self.sent_stream_usage = True