fix(utils.py): fix sagemaker async logging for sync streaming

https://github.com/BerriAI/litellm/issues/1592
2024-01-25 12:49:45 -08:00 · 2024-01-25 12:49:45 -08:00 · 09ec6d6458
commit 09ec6d6458
parent 39d5407e67
10 changed files with 247 additions and 64 deletions
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -13,17 +13,21 @@ sys.path.insert(
 import litellm


-async def generate_key(session, i, budget=None, budget_duration=None):
+async def generate_key(
+    session, i, budget=None, budget_duration=None, models=["azure-models", "gpt-4"]
+):
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {
-        "models": ["azure-models", "gpt-4"],
+        "models": models,
        "aliases": {"mistral-7b": "gpt-3.5-turbo"},
        "duration": None,
        "max_budget": budget,
        "budget_duration": budget_duration,
    }

+    print(f"data: {data}")
+
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
@ -293,7 +297,7 @@ async def test_key_info_spend_values():
        rounded_response_cost = round(response_cost, 8)
        rounded_key_info_spend = round(key_info["info"]["spend"], 8)
        assert rounded_response_cost == rounded_key_info_spend
-        ## streaming
+        ## streaming - azure
        key_gen = await generate_key(session=session, i=0)
        new_key = key_gen["key"]
        prompt_tokens, completion_tokens = await chat_completion_streaming(
@ -318,6 +322,41 @@ async def test_key_info_spend_values():
        assert rounded_response_cost == rounded_key_info_spend


+@pytest.mark.asyncio
+async def test_key_info_spend_values_sagemaker():
+    """
+    Tests the sync streaming loop to ensure spend is correctly calculated.
+    - create key
+    - make completion call
+    - assert cost is expected value
+    """
+    async with aiohttp.ClientSession() as session:
+        ## streaming - sagemaker
+        key_gen = await generate_key(session=session, i=0, models=[])
+        new_key = key_gen["key"]
+        prompt_tokens, completion_tokens = await chat_completion_streaming(
+            session=session, key=new_key, model="sagemaker-completion-model"
+        )
+        # print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
+        # prompt_cost, completion_cost = litellm.cost_per_token(
+        #     model="azure/gpt-35-turbo",
+        #     prompt_tokens=prompt_tokens,
+        #     completion_tokens=completion_tokens,
+        # )
+        # response_cost = prompt_cost + completion_cost
+        await asyncio.sleep(5)  # allow db log to be updated
+        key_info = await get_key_info(
+            session=session, get_key=new_key, call_key=new_key
+        )
+        # print(
+        #     f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
+        # )
+        # rounded_response_cost = round(response_cost, 8)
+        rounded_key_info_spend = round(key_info["info"]["spend"], 8)
+        assert rounded_key_info_spend > 0
+        # assert rounded_response_cost == rounded_key_info_spend
+
+
@pytest.mark.asyncio
 async def test_key_with_budgets():
    """