mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(Bug fix) - Using include_usage
for /completions requests + unit testing (#8484)
* pass stream options (#8419) * test_completion_streaming_usage_metrics * test_text_completion_include_usage --------- Co-authored-by: Kaushik Deka <55996465+Kaushikdkrikhanu@users.noreply.github.com>
This commit is contained in:
parent
2a79c95af7
commit
152b44075b
3 changed files with 66 additions and 0 deletions
|
@ -3947,6 +3947,7 @@ async def atext_completion(
|
||||||
),
|
),
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
stream_options=kwargs.get('stream_options'),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
## OpenAI / Azure Text Completion Returns here
|
## OpenAI / Azure Text Completion Returns here
|
||||||
|
|
|
@ -139,3 +139,38 @@ def test_convert_chat_to_text_completion_multiple_choices():
|
||||||
completion_tokens_details=None,
|
completion_tokens_details=None,
|
||||||
prompt_tokens_details=None,
|
prompt_tokens_details=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
async def test_text_completion_include_usage(sync_mode):
|
||||||
|
"""Test text completion with include_usage"""
|
||||||
|
last_chunk = None
|
||||||
|
if sync_mode:
|
||||||
|
response = await litellm.atext_completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
prompt="Hello, world!",
|
||||||
|
stream=True,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
async for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
last_chunk = chunk
|
||||||
|
else:
|
||||||
|
response = litellm.text_completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
prompt="Hello, world!",
|
||||||
|
stream=True,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
last_chunk = chunk
|
||||||
|
|
||||||
|
assert last_chunk is not None
|
||||||
|
assert last_chunk.usage is not None
|
||||||
|
assert last_chunk.usage.prompt_tokens > 0
|
||||||
|
assert last_chunk.usage.completion_tokens > 0
|
||||||
|
assert last_chunk.usage.total_tokens > 0
|
||||||
|
|
|
@ -378,6 +378,36 @@ async def test_chat_completion_streaming():
|
||||||
print(f"response_str: {response_str}")
|
print(f"response_str: {response_str}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_completion_streaming_usage_metrics():
|
||||||
|
"""
|
||||||
|
[PROD Test] Ensures usage metrics are returned correctly when `include_usage` is set to `True`
|
||||||
|
"""
|
||||||
|
client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
response = await client.completions.create(
|
||||||
|
model="gpt-instruct",
|
||||||
|
prompt="hey",
|
||||||
|
stream=True,
|
||||||
|
stream_options={"include_usage": True},
|
||||||
|
max_tokens=4,
|
||||||
|
temperature=0.00000001,
|
||||||
|
)
|
||||||
|
|
||||||
|
last_chunk = None
|
||||||
|
async for chunk in response:
|
||||||
|
print("chunk", chunk)
|
||||||
|
last_chunk = chunk
|
||||||
|
|
||||||
|
assert last_chunk is not None, "No chunks were received"
|
||||||
|
assert last_chunk.usage is not None, "Usage information was not received"
|
||||||
|
assert last_chunk.usage.prompt_tokens > 0, "Prompt tokens should be greater than 0"
|
||||||
|
assert (
|
||||||
|
last_chunk.usage.completion_tokens > 0
|
||||||
|
), "Completion tokens should be greater than 0"
|
||||||
|
assert last_chunk.usage.total_tokens > 0, "Total tokens should be greater than 0"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_chat_completion_anthropic_structured_output():
|
async def test_chat_completion_anthropic_structured_output():
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue