forked from phoenix/litellm-mirror
fix(utils.py): fix sagemaker async logging for sync streaming
https://github.com/BerriAI/litellm/issues/1592
This commit is contained in:
parent
39d5407e67
commit
09ec6d6458
10 changed files with 247 additions and 64 deletions
|
@ -13,17 +13,21 @@ sys.path.insert(
|
|||
import litellm
|
||||
|
||||
|
||||
async def generate_key(session, i, budget=None, budget_duration=None):
|
||||
async def generate_key(
|
||||
session, i, budget=None, budget_duration=None, models=["azure-models", "gpt-4"]
|
||||
):
|
||||
url = "http://0.0.0.0:4000/key/generate"
|
||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||
data = {
|
||||
"models": ["azure-models", "gpt-4"],
|
||||
"models": models,
|
||||
"aliases": {"mistral-7b": "gpt-3.5-turbo"},
|
||||
"duration": None,
|
||||
"max_budget": budget,
|
||||
"budget_duration": budget_duration,
|
||||
}
|
||||
|
||||
print(f"data: {data}")
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
@ -293,7 +297,7 @@ async def test_key_info_spend_values():
|
|||
rounded_response_cost = round(response_cost, 8)
|
||||
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
|
||||
assert rounded_response_cost == rounded_key_info_spend
|
||||
## streaming
|
||||
## streaming - azure
|
||||
key_gen = await generate_key(session=session, i=0)
|
||||
new_key = key_gen["key"]
|
||||
prompt_tokens, completion_tokens = await chat_completion_streaming(
|
||||
|
@ -318,6 +322,41 @@ async def test_key_info_spend_values():
|
|||
assert rounded_response_cost == rounded_key_info_spend
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_key_info_spend_values_sagemaker():
|
||||
"""
|
||||
Tests the sync streaming loop to ensure spend is correctly calculated.
|
||||
- create key
|
||||
- make completion call
|
||||
- assert cost is expected value
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
## streaming - sagemaker
|
||||
key_gen = await generate_key(session=session, i=0, models=[])
|
||||
new_key = key_gen["key"]
|
||||
prompt_tokens, completion_tokens = await chat_completion_streaming(
|
||||
session=session, key=new_key, model="sagemaker-completion-model"
|
||||
)
|
||||
# print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
|
||||
# prompt_cost, completion_cost = litellm.cost_per_token(
|
||||
# model="azure/gpt-35-turbo",
|
||||
# prompt_tokens=prompt_tokens,
|
||||
# completion_tokens=completion_tokens,
|
||||
# )
|
||||
# response_cost = prompt_cost + completion_cost
|
||||
await asyncio.sleep(5) # allow db log to be updated
|
||||
key_info = await get_key_info(
|
||||
session=session, get_key=new_key, call_key=new_key
|
||||
)
|
||||
# print(
|
||||
# f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
|
||||
# )
|
||||
# rounded_response_cost = round(response_cost, 8)
|
||||
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
|
||||
assert rounded_key_info_spend > 0
|
||||
# assert rounded_response_cost == rounded_key_info_spend
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_key_with_budgets():
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue