diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 093a4667a..978355568 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -897,6 +897,10 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time): from pydantic import Json import uuid + verbose_proxy_logger.debug( + f"SpendTable: get_logging_payload - kwargs: {kwargs}\n\n" + ) + if kwargs == None: kwargs = {} # standardize this function to be used across, s3, dynamoDB, langfuse logging diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 96dfeed0f..49f091cd6 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import ( generate_key_fn, spend_user_fn, spend_key_fn, + view_spend_logs, ) from litellm.proxy.utils import PrismaClient, ProxyLogging from litellm._logging import verbose_proxy_logger @@ -715,9 +716,12 @@ def test_call_with_key_over_budget(prisma_client): # update spend using track_cost callback, make 2nd request, it should fail from litellm.proxy.proxy_server import track_cost_callback from litellm import ModelResponse, Choices, Message, Usage + import time + + request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}" resp = ModelResponse( - id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", + id=request_id, choices=[ Choices( finish_reason=None, @@ -733,6 +737,7 @@ def test_call_with_key_over_budget(prisma_client): ) await track_cost_callback( kwargs={ + "model": "chatgpt-v-2", "stream": False, "litellm_params": { "metadata": { @@ -747,6 +752,18 @@ def test_call_with_key_over_budget(prisma_client): end_time=datetime.now(), ) + # test spend_log was written and we can read it + spend_logs = await view_spend_logs(request_id=request_id) + + print("read spend logs", spend_logs) + assert len(spend_logs) == 1 + + spend_log = spend_logs[0] + + assert spend_log.request_id == request_id + assert spend_log.spend == float("2e-05") + assert spend_log.model == "chatgpt-v-2" + # use generated key to auth in result = await user_api_key_auth(request=request, api_key=bearer_token) print("result from user auth with new key", result) @@ -759,7 +776,8 @@ def test_call_with_key_over_budget(prisma_client): print(vars(e)) -def test_call_with_key_over_budget_stream(prisma_client): +@pytest.mark.asyncio() +async def test_call_with_key_over_budget_stream(prisma_client): # 14. Make a call with a key over budget, expect to fail setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") @@ -769,66 +787,69 @@ def test_call_with_key_over_budget_stream(prisma_client): litellm.set_verbose = True verbose_proxy_logger.setLevel(logging.DEBUG) try: + await litellm.proxy.proxy_server.prisma_client.connect() + request = GenerateKeyRequest(max_budget=0.00001) + key = await generate_key_fn(request) + print(key) - async def test(): - await litellm.proxy.proxy_server.prisma_client.connect() - request = GenerateKeyRequest(max_budget=0.00001) - key = await generate_key_fn(request) - print(key) + generated_key = key.key + user_id = key.user_id + bearer_token = "Bearer " + generated_key - generated_key = key.key - user_id = key.user_id - bearer_token = "Bearer " + generated_key + request = Request(scope={"type": "http"}) + request._url = URL(url="/chat/completions") - request = Request(scope={"type": "http"}) - request._url = URL(url="/chat/completions") + # use generated key to auth in + result = await user_api_key_auth(request=request, api_key=bearer_token) + print("result from user auth with new key", result) - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) + # update spend using track_cost callback, make 2nd request, it should fail + from litellm.proxy.proxy_server import track_cost_callback + from litellm import ModelResponse, Choices, Message, Usage + import time - # update spend using track_cost callback, make 2nd request, it should fail - from litellm.proxy.proxy_server import track_cost_callback - from litellm import ModelResponse, Choices, Message, Usage - - resp = ModelResponse( - id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", - choices=[ - Choices( - finish_reason=None, - index=0, - message=Message( - content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", - role="assistant", - ), - ) - ], - model="gpt-35-turbo", # azure always has model written like this - usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), - ) - await track_cost_callback( - kwargs={ - "stream": True, - "complete_streaming_response": resp, - "litellm_params": { - "metadata": { - "user_api_key": generated_key, - "user_api_key_user_id": user_id, - } - }, - "response_cost": 0.00002, + request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}" + resp = ModelResponse( + id=request_id, + choices=[ + Choices( + finish_reason=None, + index=0, + message=Message( + content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", + role="assistant", + ), + ) + ], + model="gpt-35-turbo", # azure always has model written like this + usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), + ) + await track_cost_callback( + kwargs={ + "call_type": "acompletion", + "model": "sagemaker-chatgpt-v-2", + "stream": True, + "complete_streaming_response": resp, + "litellm_params": { + "metadata": { + "user_api_key": generated_key, + "user_api_key_user_id": user_id, + } }, - completion_response=ModelResponse(), - start_time=datetime.now(), - end_time=datetime.now(), - ) + "response_cost": 0.00005, + }, + completion_response=resp, + start_time=datetime.now(), + end_time=datetime.now(), + ) - # use generated key to auth in - result = await user_api_key_auth(request=request, api_key=bearer_token) - print("result from user auth with new key", result) - pytest.fail(f"This should have failed!. They key crossed it's budget") + # use generated key to auth in + result = await user_api_key_auth(request=request, api_key=bearer_token) + print("result from user auth with new key", result) + pytest.fail(f"This should have failed!. They key crossed it's budget") except Exception as e: + print("Got Exception", e) error_detail = e.message assert "Authentication Error, ExceededTokenBudget:" in error_detail print(vars(e)) diff --git a/tests/test_keys.py b/tests/test_keys.py index 917c50823..e9ee58a4d 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -219,9 +219,26 @@ async def test_key_info(): assert status == 403 +async def get_spend_logs(session, request_id): + url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + + async with session.get(url, headers=headers) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + return await response.json() + + @pytest.mark.asyncio async def test_key_info_spend_values(): """ + Test to ensure spend is correctly calculated. - create key - make completion call - assert cost is expected value @@ -229,19 +246,28 @@ async def test_key_info_spend_values(): async with aiohttp.ClientSession() as session: ## Test Spend Update ## # completion - # response = await chat_completion(session=session, key=key) - # prompt_cost, completion_cost = litellm.cost_per_token( - # model="azure/gpt-35-turbo", - # prompt_tokens=response["usage"]["prompt_tokens"], - # completion_tokens=response["usage"]["completion_tokens"], - # ) - # response_cost = prompt_cost + completion_cost - # await asyncio.sleep(5) # allow db log to be updated - # key_info = await get_key_info(session=session, get_key=key, call_key=key) - # print( - # f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}" - # ) - # assert response_cost == key_info["info"]["spend"] + key_gen = await generate_key(session=session, i=0) + key = key_gen["key"] + response = await chat_completion(session=session, key=key) + await asyncio.sleep(5) + spend_logs = await get_spend_logs(session=session, request_id=response["id"]) + print(f"spend_logs: {spend_logs}") + usage = spend_logs[0]["usage"] + prompt_cost, completion_cost = litellm.cost_per_token( + model="gpt-35-turbo", + prompt_tokens=usage["prompt_tokens"], + completion_tokens=usage["completion_tokens"], + custom_llm_provider="azure", + ) + response_cost = prompt_cost + completion_cost + await asyncio.sleep(5) # allow db log to be updated + key_info = await get_key_info(session=session, get_key=key, call_key=key) + print( + f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}" + ) + rounded_response_cost = round(response_cost, 8) + rounded_key_info_spend = round(key_info["info"]["spend"], 8) + assert rounded_response_cost == rounded_key_info_spend ## streaming key_gen = await generate_key(session=session, i=0) new_key = key_gen["key"] @@ -262,4 +288,6 @@ async def test_key_info_spend_values(): print( f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}" ) - assert response_cost == key_info["info"]["spend"] + rounded_response_cost = round(response_cost, 8) + rounded_key_info_spend = round(key_info["info"]["spend"], 8) + assert rounded_response_cost == rounded_key_info_spend diff --git a/tests/test_spend_logs.py b/tests/test_spend_logs.py new file mode 100644 index 000000000..1907c4dae --- /dev/null +++ b/tests/test_spend_logs.py @@ -0,0 +1,84 @@ +# What this tests? +## Tests /spend endpoints. + +import pytest +import asyncio +import aiohttp + + +async def generate_key(session, models=[]): + url = "http://0.0.0.0:4000/key/generate" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + data = { + "models": models, + "duration": None, + } + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + return await response.json() + + +async def chat_completion(session, key): + url = "http://0.0.0.0:4000/chat/completions" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + data = { + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"}, + ], + } + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + + return await response.json() + + +async def get_spend_logs(session, request_id): + url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + + async with session.get(url, headers=headers) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + return await response.json() + + +@pytest.mark.asyncio +async def test_spend_logs(): + """ + - Create key + - Make call (makes sure it's in spend logs) + - Get request id from logs + """ + async with aiohttp.ClientSession() as session: + key_gen = await generate_key(session=session) + key = key_gen["key"] + response = await chat_completion(session=session, key=key) + await asyncio.sleep(5) + await get_spend_logs(session=session, request_id=response["id"])