Merge branch 'main' into litellm_spend_per_user

This commit is contained in:
Ishaan Jaff 2024-01-24 12:24:15 -08:00 committed by GitHub
commit f76620b1d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 204 additions and 67 deletions

View file

@ -897,6 +897,10 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
from pydantic import Json
import uuid
verbose_proxy_logger.debug(
f"SpendTable: get_logging_payload - kwargs: {kwargs}\n\n"
)
if kwargs == None:
kwargs = {}
# standardize this function to be used across, s3, dynamoDB, langfuse logging

View file

@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import (
generate_key_fn,
spend_user_fn,
spend_key_fn,
view_spend_logs,
)
from litellm.proxy.utils import PrismaClient, ProxyLogging
from litellm._logging import verbose_proxy_logger
@ -715,9 +716,12 @@ def test_call_with_key_over_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm.proxy.proxy_server import track_cost_callback
from litellm import ModelResponse, Choices, Message, Usage
import time
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
resp = ModelResponse(
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
id=request_id,
choices=[
Choices(
finish_reason=None,
@ -733,6 +737,7 @@ def test_call_with_key_over_budget(prisma_client):
)
await track_cost_callback(
kwargs={
"model": "chatgpt-v-2",
"stream": False,
"litellm_params": {
"metadata": {
@ -747,6 +752,18 @@ def test_call_with_key_over_budget(prisma_client):
end_time=datetime.now(),
)
# test spend_log was written and we can read it
spend_logs = await view_spend_logs(request_id=request_id)
print("read spend logs", spend_logs)
assert len(spend_logs) == 1
spend_log = spend_logs[0]
assert spend_log.request_id == request_id
assert spend_log.spend == float("2e-05")
assert spend_log.model == "chatgpt-v-2"
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result)
@ -759,7 +776,8 @@ def test_call_with_key_over_budget(prisma_client):
print(vars(e))
def test_call_with_key_over_budget_stream(prisma_client):
@pytest.mark.asyncio()
async def test_call_with_key_over_budget_stream(prisma_client):
# 14. Make a call with a key over budget, expect to fail
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
@ -769,8 +787,6 @@ def test_call_with_key_over_budget_stream(prisma_client):
litellm.set_verbose = True
verbose_proxy_logger.setLevel(logging.DEBUG)
try:
async def test():
await litellm.proxy.proxy_server.prisma_client.connect()
request = GenerateKeyRequest(max_budget=0.00001)
key = await generate_key_fn(request)
@ -790,9 +806,11 @@ def test_call_with_key_over_budget_stream(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail
from litellm.proxy.proxy_server import track_cost_callback
from litellm import ModelResponse, Choices, Message, Usage
import time
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
resp = ModelResponse(
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
id=request_id,
choices=[
Choices(
finish_reason=None,
@ -808,6 +826,8 @@ def test_call_with_key_over_budget_stream(prisma_client):
)
await track_cost_callback(
kwargs={
"call_type": "acompletion",
"model": "sagemaker-chatgpt-v-2",
"stream": True,
"complete_streaming_response": resp,
"litellm_params": {
@ -816,9 +836,9 @@ def test_call_with_key_over_budget_stream(prisma_client):
"user_api_key_user_id": user_id,
}
},
"response_cost": 0.00002,
"response_cost": 0.00005,
},
completion_response=ModelResponse(),
completion_response=resp,
start_time=datetime.now(),
end_time=datetime.now(),
)
@ -829,6 +849,7 @@ def test_call_with_key_over_budget_stream(prisma_client):
pytest.fail(f"This should have failed!. They key crossed it's budget")
except Exception as e:
print("Got Exception", e)
error_detail = e.message
assert "Authentication Error, ExceededTokenBudget:" in error_detail
print(vars(e))

View file

@ -219,9 +219,26 @@ async def test_key_info():
assert status == 403
async def get_spend_logs(session, request_id):
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
async with session.get(url, headers=headers) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
@pytest.mark.asyncio
async def test_key_info_spend_values():
"""
Test to ensure spend is correctly calculated.
- create key
- make completion call
- assert cost is expected value
@ -229,19 +246,28 @@ async def test_key_info_spend_values():
async with aiohttp.ClientSession() as session:
## Test Spend Update ##
# completion
# response = await chat_completion(session=session, key=key)
# prompt_cost, completion_cost = litellm.cost_per_token(
# model="azure/gpt-35-turbo",
# prompt_tokens=response["usage"]["prompt_tokens"],
# completion_tokens=response["usage"]["completion_tokens"],
# )
# response_cost = prompt_cost + completion_cost
# await asyncio.sleep(5) # allow db log to be updated
# key_info = await get_key_info(session=session, get_key=key, call_key=key)
# print(
# f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
# )
# assert response_cost == key_info["info"]["spend"]
key_gen = await generate_key(session=session, i=0)
key = key_gen["key"]
response = await chat_completion(session=session, key=key)
await asyncio.sleep(5)
spend_logs = await get_spend_logs(session=session, request_id=response["id"])
print(f"spend_logs: {spend_logs}")
usage = spend_logs[0]["usage"]
prompt_cost, completion_cost = litellm.cost_per_token(
model="gpt-35-turbo",
prompt_tokens=usage["prompt_tokens"],
completion_tokens=usage["completion_tokens"],
custom_llm_provider="azure",
)
response_cost = prompt_cost + completion_cost
await asyncio.sleep(5) # allow db log to be updated
key_info = await get_key_info(session=session, get_key=key, call_key=key)
print(
f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
)
rounded_response_cost = round(response_cost, 8)
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
assert rounded_response_cost == rounded_key_info_spend
## streaming
key_gen = await generate_key(session=session, i=0)
new_key = key_gen["key"]
@ -262,4 +288,6 @@ async def test_key_info_spend_values():
print(
f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
)
assert response_cost == key_info["info"]["spend"]
rounded_response_cost = round(response_cost, 8)
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
assert rounded_response_cost == rounded_key_info_spend

84
tests/test_spend_logs.py Normal file
View file

@ -0,0 +1,84 @@
# What this tests?
## Tests /spend endpoints.
import pytest
import asyncio
import aiohttp
async def generate_key(session, models=[]):
url = "http://0.0.0.0:4000/key/generate"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
data = {
"models": models,
"duration": None,
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
async def chat_completion(session, key):
url = "http://0.0.0.0:4000/chat/completions"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
async def get_spend_logs(session, request_id):
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
async with session.get(url, headers=headers) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
@pytest.mark.asyncio
async def test_spend_logs():
"""
- Create key
- Make call (makes sure it's in spend logs)
- Get request id from logs
"""
async with aiohttp.ClientSession() as session:
key_gen = await generate_key(session=session)
key = key_gen["key"]
response = await chat_completion(session=session, key=key)
await asyncio.sleep(5)
await get_spend_logs(session=session, request_id=response["id"])