forked from phoenix/litellm-mirror
fix(main.py): fix async text completion streaming + add new tests
This commit is contained in:
parent
2b8e2bd937
commit
6f2734100f
2 changed files with 41 additions and 14 deletions
|
@ -2472,22 +2472,22 @@ async def atext_completion(*args, **kwargs):
|
|||
or custom_llm_provider == "ollama"
|
||||
or custom_llm_provider == "vertex_ai"
|
||||
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
||||
if kwargs.get("stream", False):
|
||||
response = text_completion(*args, **kwargs)
|
||||
else:
|
||||
# Await normally
|
||||
response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(response):
|
||||
response = await response
|
||||
# Await normally
|
||||
response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(response):
|
||||
response = await response
|
||||
else:
|
||||
# Call the synchronous function using run_in_executor
|
||||
response = await loop.run_in_executor(None, func_with_context)
|
||||
if kwargs.get("stream", False): # return an async generator
|
||||
return _async_streaming(
|
||||
response=response,
|
||||
if kwargs.get("stream", False) == True: # return an async generator
|
||||
return TextCompletionStreamWrapper(
|
||||
completion_stream=_async_streaming(
|
||||
response=response,
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
args=args,
|
||||
),
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
args=args,
|
||||
)
|
||||
else:
|
||||
return response
|
||||
|
@ -2691,11 +2691,11 @@ def text_completion(
|
|||
**kwargs,
|
||||
**optional_params,
|
||||
)
|
||||
if kwargs.get("acompletion", False) == True:
|
||||
return response
|
||||
if stream == True or kwargs.get("stream", False) == True:
|
||||
response = TextCompletionStreamWrapper(completion_stream=response, model=model)
|
||||
return response
|
||||
if kwargs.get("acompletion", False) == True:
|
||||
return response
|
||||
transformed_logprobs = None
|
||||
# only supported for TGI models
|
||||
try:
|
||||
|
|
|
@ -215,3 +215,30 @@ def test_get_response_non_openai_streaming():
|
|||
|
||||
|
||||
# test_get_response_non_openai_streaming()
|
||||
|
||||
|
||||
async def test_get_response():
|
||||
try:
|
||||
response = await litellm.atext_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
prompt="good morning",
|
||||
stream=True,
|
||||
max_tokens=10,
|
||||
)
|
||||
print(f"response: {response}")
|
||||
|
||||
num_finish_reason = 0
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
if chunk["choices"][0].get("finish_reason") is not None:
|
||||
num_finish_reason += 1
|
||||
print("finish_reason", chunk["choices"][0].get("finish_reason"))
|
||||
|
||||
assert (
|
||||
num_finish_reason == 1
|
||||
), f"expected only one finish reason. Got {num_finish_reason}"
|
||||
except Exception as e:
|
||||
pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}")
|
||||
|
||||
|
||||
# asyncio.run(test_get_response())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue