forked from phoenix/litellm-mirror
fix(utils.py): fix
This commit is contained in:
parent
54dacfdf61
commit
43af5575c8
2 changed files with 38 additions and 16 deletions
|
@ -1997,20 +1997,42 @@ def test_openai_chat_completion_complete_response_call():
|
||||||
"model",
|
"model",
|
||||||
["gpt-3.5-turbo", "azure/chatgpt-v-2"],
|
["gpt-3.5-turbo", "azure/chatgpt-v-2"],
|
||||||
)
|
)
|
||||||
def test_openai_stream_options_call(model):
|
@pytest.mark.parametrize(
|
||||||
|
"sync",
|
||||||
|
[True, False],
|
||||||
|
)
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_openai_stream_options_call(model, sync):
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
response = litellm.completion(
|
|
||||||
model=model,
|
|
||||||
messages=[{"role": "system", "content": "say GM - we're going to make it "}],
|
|
||||||
stream=True,
|
|
||||||
stream_options={"include_usage": True},
|
|
||||||
max_tokens=10,
|
|
||||||
)
|
|
||||||
usage = None
|
usage = None
|
||||||
chunks = []
|
chunks = []
|
||||||
for chunk in response:
|
if sync:
|
||||||
print("chunk: ", chunk)
|
response = litellm.completion(
|
||||||
chunks.append(chunk)
|
model=model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "say GM - we're going to make it "}
|
||||||
|
],
|
||||||
|
stream=True,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
max_tokens=10,
|
||||||
|
)
|
||||||
|
for chunk in response:
|
||||||
|
print("chunk: ", chunk)
|
||||||
|
chunks.append(chunk)
|
||||||
|
else:
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model=model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "say GM - we're going to make it "}
|
||||||
|
],
|
||||||
|
stream=True,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
max_tokens=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
async for chunk in response:
|
||||||
|
print("chunk: ", chunk)
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
last_chunk = chunks[-1]
|
last_chunk = chunks[-1]
|
||||||
print("last chunk: ", last_chunk)
|
print("last chunk: ", last_chunk)
|
||||||
|
|
|
@ -11914,7 +11914,7 @@ class CustomStreamWrapper:
|
||||||
input=self.response_uptil_now, model=self.model
|
input=self.response_uptil_now, model=self.model
|
||||||
)
|
)
|
||||||
print_verbose(f"final returned processed chunk: {processed_chunk}")
|
print_verbose(f"final returned processed chunk: {processed_chunk}")
|
||||||
self.chunks.append(response)
|
self.chunks.append(processed_chunk)
|
||||||
return processed_chunk
|
return processed_chunk
|
||||||
raise StopAsyncIteration
|
raise StopAsyncIteration
|
||||||
else: # temporary patch for non-aiohttp async calls
|
else: # temporary patch for non-aiohttp async calls
|
||||||
|
@ -11954,7 +11954,7 @@ class CustomStreamWrapper:
|
||||||
input=self.response_uptil_now, model=self.model
|
input=self.response_uptil_now, model=self.model
|
||||||
)
|
)
|
||||||
# RETURN RESULT
|
# RETURN RESULT
|
||||||
self.chunks.append(response)
|
self.chunks.append(processed_chunk)
|
||||||
return processed_chunk
|
return processed_chunk
|
||||||
except StopAsyncIteration:
|
except StopAsyncIteration:
|
||||||
if self.sent_last_chunk == True:
|
if self.sent_last_chunk == True:
|
||||||
|
@ -11965,17 +11965,17 @@ class CustomStreamWrapper:
|
||||||
):
|
):
|
||||||
# send the final chunk with stream options
|
# send the final chunk with stream options
|
||||||
complete_streaming_response = litellm.stream_chunk_builder(
|
complete_streaming_response = litellm.stream_chunk_builder(
|
||||||
chunks=self.chunks
|
chunks=self.chunks, messages=self.messages
|
||||||
)
|
)
|
||||||
response = self.model_response_creator()
|
response = self.model_response_creator()
|
||||||
response.usage = complete_streaming_response.usage
|
response.usage = complete_streaming_response.usage
|
||||||
## LOGGING
|
## LOGGING
|
||||||
threading.Thread(
|
threading.Thread(
|
||||||
target=self.logging_obj.success_handler, args=(processed_chunk,)
|
target=self.logging_obj.success_handler, args=(response,)
|
||||||
).start() # log response
|
).start() # log response
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.logging_obj.async_success_handler(
|
self.logging_obj.async_success_handler(
|
||||||
processed_chunk,
|
response,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.sent_stream_usage = True
|
self.sent_stream_usage = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue