Merge branch 'main' into litellm_spend_per_user

This commit is contained in:
Ishaan Jaff 2024-01-24 12:24:15 -08:00 committed by GitHub
commit f76620b1d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 204 additions and 67 deletions

View file

@ -897,6 +897,10 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
from pydantic import Json from pydantic import Json
import uuid import uuid
verbose_proxy_logger.debug(
f"SpendTable: get_logging_payload - kwargs: {kwargs}\n\n"
)
if kwargs == None: if kwargs == None:
kwargs = {} kwargs = {}
# standardize this function to be used across, s3, dynamoDB, langfuse logging # standardize this function to be used across, s3, dynamoDB, langfuse logging

View file

@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import (
generate_key_fn, generate_key_fn,
spend_user_fn, spend_user_fn,
spend_key_fn, spend_key_fn,
view_spend_logs,
) )
from litellm.proxy.utils import PrismaClient, ProxyLogging from litellm.proxy.utils import PrismaClient, ProxyLogging
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
@ -715,9 +716,12 @@ def test_call_with_key_over_budget(prisma_client):
# update spend using track_cost callback, make 2nd request, it should fail # update spend using track_cost callback, make 2nd request, it should fail
from litellm.proxy.proxy_server import track_cost_callback from litellm.proxy.proxy_server import track_cost_callback
from litellm import ModelResponse, Choices, Message, Usage from litellm import ModelResponse, Choices, Message, Usage
import time
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
resp = ModelResponse( resp = ModelResponse(
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", id=request_id,
choices=[ choices=[
Choices( Choices(
finish_reason=None, finish_reason=None,
@ -733,6 +737,7 @@ def test_call_with_key_over_budget(prisma_client):
) )
await track_cost_callback( await track_cost_callback(
kwargs={ kwargs={
"model": "chatgpt-v-2",
"stream": False, "stream": False,
"litellm_params": { "litellm_params": {
"metadata": { "metadata": {
@ -747,6 +752,18 @@ def test_call_with_key_over_budget(prisma_client):
end_time=datetime.now(), end_time=datetime.now(),
) )
# test spend_log was written and we can read it
spend_logs = await view_spend_logs(request_id=request_id)
print("read spend logs", spend_logs)
assert len(spend_logs) == 1
spend_log = spend_logs[0]
assert spend_log.request_id == request_id
assert spend_log.spend == float("2e-05")
assert spend_log.model == "chatgpt-v-2"
# use generated key to auth in # use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token) result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result) print("result from user auth with new key", result)
@ -759,7 +776,8 @@ def test_call_with_key_over_budget(prisma_client):
print(vars(e)) print(vars(e))
def test_call_with_key_over_budget_stream(prisma_client): @pytest.mark.asyncio()
async def test_call_with_key_over_budget_stream(prisma_client):
# 14. Make a call with a key over budget, expect to fail # 14. Make a call with a key over budget, expect to fail
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
@ -769,66 +787,69 @@ def test_call_with_key_over_budget_stream(prisma_client):
litellm.set_verbose = True litellm.set_verbose = True
verbose_proxy_logger.setLevel(logging.DEBUG) verbose_proxy_logger.setLevel(logging.DEBUG)
try: try:
await litellm.proxy.proxy_server.prisma_client.connect()
request = GenerateKeyRequest(max_budget=0.00001)
key = await generate_key_fn(request)
print(key)
async def test(): generated_key = key.key
await litellm.proxy.proxy_server.prisma_client.connect() user_id = key.user_id
request = GenerateKeyRequest(max_budget=0.00001) bearer_token = "Bearer " + generated_key
key = await generate_key_fn(request)
print(key)
generated_key = key.key request = Request(scope={"type": "http"})
user_id = key.user_id request._url = URL(url="/chat/completions")
bearer_token = "Bearer " + generated_key
request = Request(scope={"type": "http"}) # use generated key to auth in
request._url = URL(url="/chat/completions") result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result)
# use generated key to auth in # update spend using track_cost callback, make 2nd request, it should fail
result = await user_api_key_auth(request=request, api_key=bearer_token) from litellm.proxy.proxy_server import track_cost_callback
print("result from user auth with new key", result) from litellm import ModelResponse, Choices, Message, Usage
import time
# update spend using track_cost callback, make 2nd request, it should fail request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
from litellm.proxy.proxy_server import track_cost_callback resp = ModelResponse(
from litellm import ModelResponse, Choices, Message, Usage id=request_id,
choices=[
resp = ModelResponse( Choices(
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", finish_reason=None,
choices=[ index=0,
Choices( message=Message(
finish_reason=None, content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
index=0, role="assistant",
message=Message( ),
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", )
role="assistant", ],
), model="gpt-35-turbo", # azure always has model written like this
) usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
], )
model="gpt-35-turbo", # azure always has model written like this await track_cost_callback(
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410), kwargs={
) "call_type": "acompletion",
await track_cost_callback( "model": "sagemaker-chatgpt-v-2",
kwargs={ "stream": True,
"stream": True, "complete_streaming_response": resp,
"complete_streaming_response": resp, "litellm_params": {
"litellm_params": { "metadata": {
"metadata": { "user_api_key": generated_key,
"user_api_key": generated_key, "user_api_key_user_id": user_id,
"user_api_key_user_id": user_id, }
}
},
"response_cost": 0.00002,
}, },
completion_response=ModelResponse(), "response_cost": 0.00005,
start_time=datetime.now(), },
end_time=datetime.now(), completion_response=resp,
) start_time=datetime.now(),
end_time=datetime.now(),
)
# use generated key to auth in # use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token) result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result) print("result from user auth with new key", result)
pytest.fail(f"This should have failed!. They key crossed it's budget") pytest.fail(f"This should have failed!. They key crossed it's budget")
except Exception as e: except Exception as e:
print("Got Exception", e)
error_detail = e.message error_detail = e.message
assert "Authentication Error, ExceededTokenBudget:" in error_detail assert "Authentication Error, ExceededTokenBudget:" in error_detail
print(vars(e)) print(vars(e))

View file

@ -219,9 +219,26 @@ async def test_key_info():
assert status == 403 assert status == 403
async def get_spend_logs(session, request_id):
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
async with session.get(url, headers=headers) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_key_info_spend_values(): async def test_key_info_spend_values():
""" """
Test to ensure spend is correctly calculated.
- create key - create key
- make completion call - make completion call
- assert cost is expected value - assert cost is expected value
@ -229,19 +246,28 @@ async def test_key_info_spend_values():
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
## Test Spend Update ## ## Test Spend Update ##
# completion # completion
# response = await chat_completion(session=session, key=key) key_gen = await generate_key(session=session, i=0)
# prompt_cost, completion_cost = litellm.cost_per_token( key = key_gen["key"]
# model="azure/gpt-35-turbo", response = await chat_completion(session=session, key=key)
# prompt_tokens=response["usage"]["prompt_tokens"], await asyncio.sleep(5)
# completion_tokens=response["usage"]["completion_tokens"], spend_logs = await get_spend_logs(session=session, request_id=response["id"])
# ) print(f"spend_logs: {spend_logs}")
# response_cost = prompt_cost + completion_cost usage = spend_logs[0]["usage"]
# await asyncio.sleep(5) # allow db log to be updated prompt_cost, completion_cost = litellm.cost_per_token(
# key_info = await get_key_info(session=session, get_key=key, call_key=key) model="gpt-35-turbo",
# print( prompt_tokens=usage["prompt_tokens"],
# f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}" completion_tokens=usage["completion_tokens"],
# ) custom_llm_provider="azure",
# assert response_cost == key_info["info"]["spend"] )
response_cost = prompt_cost + completion_cost
await asyncio.sleep(5) # allow db log to be updated
key_info = await get_key_info(session=session, get_key=key, call_key=key)
print(
f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
)
rounded_response_cost = round(response_cost, 8)
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
assert rounded_response_cost == rounded_key_info_spend
## streaming ## streaming
key_gen = await generate_key(session=session, i=0) key_gen = await generate_key(session=session, i=0)
new_key = key_gen["key"] new_key = key_gen["key"]
@ -262,4 +288,6 @@ async def test_key_info_spend_values():
print( print(
f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}" f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
) )
assert response_cost == key_info["info"]["spend"] rounded_response_cost = round(response_cost, 8)
rounded_key_info_spend = round(key_info["info"]["spend"], 8)
assert rounded_response_cost == rounded_key_info_spend

84
tests/test_spend_logs.py Normal file
View file

@ -0,0 +1,84 @@
# What this tests?
## Tests /spend endpoints.
import pytest
import asyncio
import aiohttp
async def generate_key(session, models=[]):
url = "http://0.0.0.0:4000/key/generate"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
data = {
"models": models,
"duration": None,
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
async def chat_completion(session, key):
url = "http://0.0.0.0:4000/chat/completions"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
async def get_spend_logs(session, request_id):
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
async with session.get(url, headers=headers) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
@pytest.mark.asyncio
async def test_spend_logs():
"""
- Create key
- Make call (makes sure it's in spend logs)
- Get request id from logs
"""
async with aiohttp.ClientSession() as session:
key_gen = await generate_key(session=session)
key = key_gen["key"]
response = await chat_completion(session=session, key=key)
await asyncio.sleep(5)
await get_spend_logs(session=session, request_id=response["id"])