forked from phoenix/litellm-mirror
* fix(factory.py): ensure tool call converts image url Fixes https://github.com/BerriAI/litellm/issues/6953 * fix(transformation.py): support mp4 + pdf url's for vertex ai Fixes https://github.com/BerriAI/litellm/issues/6936 * fix(http_handler.py): mask gemini api key in error logs Fixes https://github.com/BerriAI/litellm/issues/6963 * docs(prometheus.md): update prometheus FAQs * feat(auth_checks.py): ensure specific model access > wildcard model access if wildcard model is in access group, but specific model is not - deny access * fix(auth_checks.py): handle auth checks for team based model access groups handles scenario where model access group used for wildcard models * fix(internal_user_endpoints.py): support adding guardrails on `/user/update` Fixes https://github.com/BerriAI/litellm/issues/6942 * fix(key_management_endpoints.py): fix prepare_metadata_fields helper * fix: fix tests * build(requirements.txt): bump openai dep version fixes proxies argument * test: fix tests * fix(http_handler.py): fix error message masking * fix(bedrock_guardrails.py): pass in prepped data * test: fix test * test: fix nvidia nim test * fix(http_handler.py): return original response headers * fix: revert maskedhttpstatuserror * test: update tests * test: cleanup test * fix(key_management_endpoints.py): fix metadata field update logic * fix(key_management_endpoints.py): maintain initial order of guardrails in key update * fix(key_management_endpoints.py): handle prepare metadata * fix: fix linting errors * fix: fix linting errors * fix: fix linting errors * fix: fix key management errors * fix(key_management_endpoints.py): update metadata * test: update test * refactor: add more debug statements * test: skip flaky test * test: fix test * fix: fix test * fix: fix update metadata logic * fix: fix test * ci(config.yml): change db url for e2e ui testing
3770 lines
130 KiB
Python
3770 lines
130 KiB
Python
# Test the following scenarios:
|
|
# 1. Generate a Key, and use it to make a call
|
|
# 2. Make a call with invalid key, expect it to fail
|
|
# 3. Make a call to a key with invalid model - expect to fail
|
|
# 4. Make a call to a key with valid model - expect to pass
|
|
# 5. Make a call with user over budget, expect to fail
|
|
# 6. Make a streaming chat/completions call with user over budget, expect to fail
|
|
# 7. Make a call with an key that never expires, expect to pass
|
|
# 8. Make a call with an expired key, expect to fail
|
|
# 9. Delete a Key
|
|
# 10. Generate a key, call key/info. Assert info returned is the same as generated key info
|
|
# 11. Generate a Key, cal key/info, call key/update, call key/info
|
|
# 12. Make a call with key over budget, expect to fail
|
|
# 14. Make a streaming chat/completions call with key over budget, expect to fail
|
|
# 15. Generate key, when `allow_user_auth`=False - check if `/key/info` returns key_name=null
|
|
# 16. Generate key, when `allow_user_auth`=True - check if `/key/info` returns key_name=sk...<last-4-digits>
|
|
|
|
|
|
# function to call to generate key - async def new_user(data: NewUserRequest):
|
|
# function to validate a request - async def user_auth(request: Request):
|
|
|
|
import os
|
|
import sys
|
|
import traceback
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
|
|
from dotenv import load_dotenv
|
|
from fastapi import Request
|
|
from fastapi.routing import APIRoute
|
|
import httpx
|
|
|
|
load_dotenv()
|
|
import io
|
|
import os
|
|
import time
|
|
|
|
# this file is to test litellm/proxy
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import asyncio
|
|
import logging
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_proxy_logger
|
|
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
|
new_user,
|
|
user_info,
|
|
user_update,
|
|
)
|
|
from litellm.proxy.auth.auth_checks import get_key_object
|
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
|
delete_key_fn,
|
|
generate_key_fn,
|
|
generate_key_helper_fn,
|
|
info_key_fn,
|
|
list_keys,
|
|
regenerate_key_fn,
|
|
update_key_fn,
|
|
)
|
|
from litellm.proxy.management_endpoints.team_endpoints import (
|
|
new_team,
|
|
team_info,
|
|
update_team,
|
|
)
|
|
from litellm.proxy.proxy_server import (
|
|
LitellmUserRoles,
|
|
audio_transcriptions,
|
|
chat_completion,
|
|
completion,
|
|
embeddings,
|
|
image_generation,
|
|
model_list,
|
|
moderations,
|
|
user_api_key_auth,
|
|
)
|
|
from litellm.proxy.management_endpoints.customer_endpoints import (
|
|
new_end_user,
|
|
)
|
|
from litellm.proxy.spend_tracking.spend_management_endpoints import (
|
|
global_spend,
|
|
spend_key_fn,
|
|
spend_user_fn,
|
|
view_spend_logs,
|
|
)
|
|
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
|
|
|
|
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|
|
|
from starlette.datastructures import URL
|
|
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.proxy._types import (
|
|
DynamoDBArgs,
|
|
GenerateKeyRequest,
|
|
KeyRequest,
|
|
LiteLLM_UpperboundKeyGenerateParams,
|
|
NewCustomerRequest,
|
|
NewTeamRequest,
|
|
NewUserRequest,
|
|
ProxyErrorTypes,
|
|
ProxyException,
|
|
UpdateKeyRequest,
|
|
UpdateTeamRequest,
|
|
UpdateUserRequest,
|
|
UserAPIKeyAuth,
|
|
)
|
|
|
|
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
|
|
|
|
|
request_data = {
|
|
"model": "azure-gpt-3.5",
|
|
"messages": [
|
|
{"role": "user", "content": "this is my new test. respond in 50 lines"}
|
|
],
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def prisma_client():
|
|
from litellm.proxy.proxy_cli import append_query_params
|
|
|
|
### add connection pool + pool timeout args
|
|
params = {"connection_limit": 100, "pool_timeout": 60}
|
|
database_url = os.getenv("DATABASE_URL")
|
|
modified_url = append_query_params(database_url, params)
|
|
os.environ["DATABASE_URL"] = modified_url
|
|
|
|
# Assuming PrismaClient is a class that needs to be instantiated
|
|
prisma_client = PrismaClient(
|
|
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
|
)
|
|
|
|
# Reset litellm.proxy.proxy_server.prisma_client to None
|
|
litellm.proxy.proxy_server.litellm_proxy_budget_name = (
|
|
f"litellm-proxy-budget-{time.time()}"
|
|
)
|
|
litellm.proxy.proxy_server.user_custom_key_generate = None
|
|
|
|
return prisma_client
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
@pytest.mark.flaky(retries=6, delay=1)
|
|
async def test_new_user_response(prisma_client):
|
|
try:
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
_team_id = "ishaan-special-team_{}".format(uuid.uuid4())
|
|
await new_team(
|
|
NewTeamRequest(
|
|
team_id=_team_id,
|
|
),
|
|
http_request=Request(scope={"type": "http"}),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
_response = await new_user(
|
|
data=NewUserRequest(
|
|
models=["azure-gpt-3.5"],
|
|
team_id=_team_id,
|
|
tpm_limit=20,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
)
|
|
print(_response)
|
|
assert _response.models == ["azure-gpt-3.5"]
|
|
assert _response.team_id == _team_id
|
|
assert _response.tpm_limit == 20
|
|
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"api_route",
|
|
[
|
|
# chat_completion
|
|
APIRoute(path="/engines/{model}/chat/completions", endpoint=chat_completion),
|
|
APIRoute(
|
|
path="/openai/deployments/{model}/chat/completions",
|
|
endpoint=chat_completion,
|
|
),
|
|
APIRoute(path="/chat/completions", endpoint=chat_completion),
|
|
APIRoute(path="/v1/chat/completions", endpoint=chat_completion),
|
|
# completion
|
|
APIRoute(path="/completions", endpoint=completion),
|
|
APIRoute(path="/v1/completions", endpoint=completion),
|
|
APIRoute(path="/engines/{model}/completions", endpoint=completion),
|
|
APIRoute(path="/openai/deployments/{model}/completions", endpoint=completion),
|
|
# embeddings
|
|
APIRoute(path="/v1/embeddings", endpoint=embeddings),
|
|
APIRoute(path="/embeddings", endpoint=embeddings),
|
|
APIRoute(path="/openai/deployments/{model}/embeddings", endpoint=embeddings),
|
|
# image generation
|
|
APIRoute(path="/v1/images/generations", endpoint=image_generation),
|
|
APIRoute(path="/images/generations", endpoint=image_generation),
|
|
# audio transcriptions
|
|
APIRoute(path="/v1/audio/transcriptions", endpoint=audio_transcriptions),
|
|
APIRoute(path="/audio/transcriptions", endpoint=audio_transcriptions),
|
|
# moderations
|
|
APIRoute(path="/v1/moderations", endpoint=moderations),
|
|
APIRoute(path="/moderations", endpoint=moderations),
|
|
# model_list
|
|
APIRoute(path="/v1/models", endpoint=model_list),
|
|
APIRoute(path="/models", endpoint=model_list),
|
|
# threads
|
|
APIRoute(
|
|
path="/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ", endpoint=model_list
|
|
),
|
|
],
|
|
ids=lambda route: str(dict(route=route.endpoint.__name__, path=route.path)),
|
|
)
|
|
def test_generate_and_call_with_valid_key(prisma_client, api_route):
|
|
# 1. Generate a Key, and use it to make a call
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
user_api_key_dict = UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
)
|
|
request = NewUserRequest(user_role=LitellmUserRoles.INTERNAL_USER)
|
|
key = await new_user(request, user_api_key_dict=user_api_key_dict)
|
|
print(key)
|
|
user_id = key.user_id
|
|
|
|
# check /user/info to verify user_role was set correctly
|
|
new_user_info = await user_info(
|
|
user_id=user_id, user_api_key_dict=user_api_key_dict
|
|
)
|
|
new_user_info = new_user_info.user_info
|
|
print("new_user_info=", new_user_info)
|
|
assert new_user_info["user_role"] == LitellmUserRoles.INTERNAL_USER
|
|
assert new_user_info["user_id"] == user_id
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
|
|
|
|
value_from_prisma = await prisma_client.get_data(
|
|
token=generated_key,
|
|
)
|
|
print("token from prisma", value_from_prisma)
|
|
|
|
request = Request(
|
|
{
|
|
"type": "http",
|
|
"route": api_route,
|
|
"path": api_route.path,
|
|
"headers": [("Authorization", bearer_token)],
|
|
}
|
|
)
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
def test_call_with_invalid_key(prisma_client):
|
|
# 2. Make a call with invalid key, expect it to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
generated_key = "sk-126666"
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"}, receive=None)
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("got result", result)
|
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
print(e.message)
|
|
assert "Authentication Error, Invalid proxy server token passed" in e.message
|
|
pass
|
|
|
|
|
|
def test_call_with_invalid_model(prisma_client):
|
|
litellm.set_verbose = True
|
|
# 3. Make a call to a key with an invalid model - expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(models=["mistral"])
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
async def return_body():
|
|
return b'{"model": "gemini-pro-vision"}'
|
|
|
|
request.body = return_body
|
|
|
|
# use generated key to auth in
|
|
print(
|
|
"Bearer token being sent to user_api_key_auth() - {}".format(
|
|
bearer_token
|
|
)
|
|
)
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
pytest.fail(f"This should have failed!. IT's an invalid model")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
assert (
|
|
e.message
|
|
== "Authentication Error, API Key not allowed to access model. This token can only access models=['mistral']. Tried to access gemini-pro-vision"
|
|
)
|
|
pass
|
|
|
|
|
|
def test_call_with_valid_model(prisma_client):
|
|
# 4. Make a call to a key with a valid model - expect to pass
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(models=["mistral"])
|
|
key = await new_user(
|
|
request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
async def return_body():
|
|
return b'{"model": "mistral"}'
|
|
|
|
request.body = return_body
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_call_with_valid_model_using_all_models(prisma_client):
|
|
"""
|
|
Do not delete
|
|
this is the Admin UI flow
|
|
1. Create a team with model = `all-proxy-models`
|
|
2. Create a key with model = `all-team-models`
|
|
3. Call /chat/completions with the key -> expect to pass
|
|
"""
|
|
# Make a call to a key with model = `all-proxy-models` this is an Alias from LiteLLM Admin UI
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
team_request = NewTeamRequest(
|
|
team_alias="testing-team",
|
|
models=["all-proxy-models"],
|
|
)
|
|
|
|
new_team_response = await new_team(
|
|
data=team_request,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
print("new_team_response", new_team_response)
|
|
created_team_id = new_team_response["team_id"]
|
|
|
|
request = GenerateKeyRequest(
|
|
models=["all-team-models"], team_id=created_team_id
|
|
)
|
|
key = await generate_key_fn(data=request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
async def return_body():
|
|
return b'{"model": "mistral"}'
|
|
|
|
request.body = return_body
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# call /key/info for key - models == "all-proxy-models"
|
|
key_info = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("key_info", key_info)
|
|
models = key_info["info"]["models"]
|
|
assert models == ["all-team-models"]
|
|
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
def test_call_with_user_over_budget(prisma_client):
|
|
# 5. Make a call with a key over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(max_budget=0.00001)
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
resp = ModelResponse(
|
|
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": generated_key,
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await asyncio.sleep(5)
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail("This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
print("got an errror=", e)
|
|
error_detail = e.message
|
|
assert "ExceededBudget:" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
def test_end_user_cache_write_unit_test():
|
|
"""
|
|
assert end user object is being written to cache as expected
|
|
"""
|
|
pass
|
|
|
|
|
|
def test_call_with_end_user_over_budget(prisma_client):
|
|
# Test if a user passed to /chat/completions is tracked & fails when they cross their budget
|
|
# we only check this when litellm.max_end_user_budget is set
|
|
import random
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm, "max_end_user_budget", 0.00001)
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
user = f"ishaan {uuid.uuid4().hex}"
|
|
request = NewCustomerRequest(
|
|
user_id=user, max_budget=0.000001
|
|
) # create a key with no budget
|
|
await new_end_user(
|
|
request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
bearer_token = "Bearer sk-1234"
|
|
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
|
|
async def return_body():
|
|
return_string = f'{{"model": "gemini-pro-vision", "user": "{user}"}}'
|
|
# return string as bytes
|
|
return return_string.encode()
|
|
|
|
request.body = return_body
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
resp = ModelResponse(
|
|
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": "sk-1234",
|
|
"user_api_key_user_id": user,
|
|
},
|
|
"proxy_server_request": {
|
|
"body": {
|
|
"user": user,
|
|
}
|
|
},
|
|
},
|
|
"response_cost": 10,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
|
|
await asyncio.sleep(10)
|
|
await update_spend(
|
|
prisma_client=prisma_client,
|
|
db_writer_client=None,
|
|
proxy_logging_obj=proxy_logging_obj,
|
|
)
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail("This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
error_detail = e.message
|
|
assert "Budget has been exceeded! Current" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
def test_call_with_proxy_over_budget(prisma_client):
|
|
# 5.1 Make a call with a proxy over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
|
setattr(
|
|
litellm.proxy.proxy_server,
|
|
"litellm_proxy_admin_name",
|
|
litellm_proxy_budget_name,
|
|
)
|
|
setattr(litellm, "max_budget", 0.00001)
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
user_api_key_cache.set_cache(
|
|
key="{}:spend".format(litellm_proxy_budget_name), value=0
|
|
)
|
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest()
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
resp = ModelResponse(
|
|
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": generated_key,
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
|
|
await asyncio.sleep(5)
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail(f"This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
if hasattr(e, "message"):
|
|
error_detail = e.message
|
|
else:
|
|
error_detail = traceback.format_exc()
|
|
assert "Budget has been exceeded" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
def test_call_with_user_over_budget_stream(prisma_client):
|
|
# 6. Make a call with a key over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
import logging
|
|
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
litellm.set_verbose = True
|
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(max_budget=0.00001)
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
resp = ModelResponse(
|
|
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"stream": True,
|
|
"complete_streaming_response": resp,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": generated_key,
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=ModelResponse(),
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await asyncio.sleep(5)
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail("This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
error_detail = e.message
|
|
assert "ExceededBudget:" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
def test_call_with_proxy_over_budget_stream(prisma_client):
|
|
# 6.1 Make a call with a global proxy over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
|
setattr(
|
|
litellm.proxy.proxy_server,
|
|
"litellm_proxy_admin_name",
|
|
litellm_proxy_budget_name,
|
|
)
|
|
setattr(litellm, "max_budget", 0.00001)
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
user_api_key_cache.set_cache(
|
|
key="{}:spend".format(litellm_proxy_budget_name), value=0
|
|
)
|
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
|
|
|
import logging
|
|
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
litellm.set_verbose = True
|
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
## CREATE PROXY + USER BUDGET ##
|
|
# request = NewUserRequest(
|
|
# max_budget=0.00001, user_id=litellm_proxy_budget_name
|
|
# )
|
|
request = NewUserRequest()
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
resp = ModelResponse(
|
|
id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"stream": True,
|
|
"complete_streaming_response": resp,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": generated_key,
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=ModelResponse(),
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await asyncio.sleep(5)
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail(f"This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
error_detail = e.message
|
|
assert "Budget has been exceeded" in error_detail
|
|
print(vars(e))
|
|
|
|
|
|
def test_generate_and_call_with_valid_key_never_expires(prisma_client):
|
|
# 7. Make a call with an key that never expires, expect to pass
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(duration=None)
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
def test_generate_and_call_with_expired_key(prisma_client):
|
|
# 8. Make a call with an expired key, expect to fail
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(duration="0s")
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail("This should have failed!. It's an expired key")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
print(e.message)
|
|
assert "Authentication Error" in e.message
|
|
assert e.type == ProxyErrorTypes.expired_key
|
|
|
|
pass
|
|
|
|
|
|
def test_delete_key(prisma_client):
|
|
# 9. Generate a Key, delete it. Check if deletion works fine
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm.proxy.proxy_server, "user_custom_auth", None)
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
request = NewUserRequest()
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
delete_key_request = KeyRequest(keys=[generated_key])
|
|
|
|
bearer_token = "Bearer sk-1234"
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/key/delete")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print(f"result: {result}")
|
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
|
# delete the key
|
|
result_delete_key = await delete_key_fn(
|
|
data=delete_key_request, user_api_key_dict=result
|
|
)
|
|
print("result from delete key", result_delete_key)
|
|
assert result_delete_key == {"deleted_keys": [generated_key]}
|
|
|
|
assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
|
|
assert (
|
|
hash_token(generated_key)
|
|
not in user_api_key_cache.in_memory_cache.cache_dict
|
|
)
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
def test_delete_key_auth(prisma_client):
|
|
# 10. Generate a Key, delete it, use it to make a call -> expect fail
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
request = NewUserRequest()
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
delete_key_request = KeyRequest(keys=[generated_key])
|
|
|
|
# delete the key
|
|
bearer_token = "Bearer sk-1234"
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/key/delete")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print(f"result: {result}")
|
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
|
|
|
result_delete_key = await delete_key_fn(
|
|
data=delete_key_request, user_api_key_dict=result
|
|
)
|
|
|
|
print("result from delete key", result_delete_key)
|
|
assert result_delete_key == {"deleted_keys": [generated_key]}
|
|
|
|
request = Request(scope={"type": "http"}, receive=None)
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
|
|
assert (
|
|
hash_token(generated_key)
|
|
not in user_api_key_cache.in_memory_cache.cache_dict
|
|
)
|
|
|
|
# use generated key to auth in
|
|
bearer_token = "Bearer " + generated_key
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("got result", result)
|
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
print(e.message)
|
|
assert "Authentication Error" in e.message
|
|
pass
|
|
|
|
|
|
def test_generate_and_call_key_info(prisma_client):
|
|
# 10. Generate a Key, cal key/info
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest(
|
|
metadata={"team": "litellm-team3", "project": "litellm-project3"}
|
|
)
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
|
|
# use generated key to auth in
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["max_parallel_requests"] == None
|
|
assert result["info"]["metadata"] == {
|
|
"team": "litellm-team3",
|
|
"project": "litellm-project3",
|
|
}
|
|
|
|
# cleanup - delete key
|
|
delete_key_request = KeyRequest(keys=[generated_key])
|
|
bearer_token = "Bearer sk-1234"
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/key/delete")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print(f"result: {result}")
|
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
|
|
|
result_delete_key = await delete_key_fn(
|
|
data=delete_key_request, user_api_key_dict=result
|
|
)
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
def test_generate_and_update_key(prisma_client):
|
|
# 11. Generate a Key, cal key/info, call key/update, call key/info
|
|
# Check if data gets updated
|
|
# Check if untouched data does not get updated
|
|
import uuid
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
# create team "litellm-core-infra@gmail.com""
|
|
print("creating team litellm-core-infra@gmail.com")
|
|
_team_1 = "litellm-core-infra@gmail.com_{}".format(uuid.uuid4())
|
|
await new_team(
|
|
NewTeamRequest(
|
|
team_id=_team_1,
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
|
|
_team_2 = "ishaan-special-team_{}".format(uuid.uuid4())
|
|
await new_team(
|
|
NewTeamRequest(
|
|
team_id=_team_2,
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
|
|
request = NewUserRequest(
|
|
metadata={"project": "litellm-project3"},
|
|
team_id=_team_1,
|
|
)
|
|
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
|
|
# use generated key to auth in
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["max_parallel_requests"] == None
|
|
assert result["info"]["metadata"] == {
|
|
"project": "litellm-project3",
|
|
}
|
|
assert result["info"]["team_id"] == _team_1
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/update/key")
|
|
|
|
# update the key
|
|
response1 = await update_key_fn(
|
|
request=Request,
|
|
data=UpdateKeyRequest(
|
|
key=generated_key,
|
|
models=["ada", "babbage", "curie", "davinci"],
|
|
budget_duration="1mo",
|
|
max_budget=100,
|
|
),
|
|
)
|
|
|
|
print("response1=", response1)
|
|
|
|
# update the team id
|
|
response2 = await update_key_fn(
|
|
request=Request,
|
|
data=UpdateKeyRequest(key=generated_key, team_id=_team_2),
|
|
)
|
|
print("response2=", response2)
|
|
|
|
# get info on key after update
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["max_parallel_requests"] == None
|
|
assert result["info"]["metadata"] == {
|
|
"project": "litellm-project3",
|
|
}
|
|
assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
|
|
assert result["info"]["team_id"] == _team_2
|
|
assert result["info"]["budget_duration"] == "1mo"
|
|
assert result["info"]["max_budget"] == 100
|
|
|
|
# budget_reset_at should be 30 days from now
|
|
assert result["info"]["budget_reset_at"] is not None
|
|
budget_reset_at = result["info"]["budget_reset_at"].replace(
|
|
tzinfo=timezone.utc
|
|
)
|
|
current_time = datetime.now(timezone.utc)
|
|
|
|
# assert budget_reset_at is 30 days from now
|
|
assert 31 >= (budget_reset_at - current_time).days >= 29
|
|
|
|
# cleanup - delete key
|
|
delete_key_request = KeyRequest(keys=[generated_key])
|
|
|
|
# delete the key
|
|
bearer_token = "Bearer sk-1234"
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/key/delete")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print(f"result: {result}")
|
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
|
|
|
result_delete_key = await delete_key_fn(
|
|
data=delete_key_request, user_api_key_dict=result
|
|
)
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"An exception occurred - {str(e)}\n{traceback.format_exc()}")
|
|
|
|
|
|
def test_key_generate_with_custom_auth(prisma_client):
|
|
# custom - generate key function
|
|
async def custom_generate_key_fn(data: GenerateKeyRequest) -> dict:
|
|
"""
|
|
Asynchronous function for generating a key based on the input data.
|
|
|
|
Args:
|
|
data (GenerateKeyRequest): The input data for key generation.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the decision and an optional message.
|
|
{
|
|
"decision": False,
|
|
"message": "This violates LiteLLM Proxy Rules. No team id provided.",
|
|
}
|
|
"""
|
|
|
|
# decide if a key should be generated or not
|
|
print("using custom auth function!")
|
|
data_json = data.json() # type: ignore
|
|
|
|
# Unpacking variables
|
|
team_id = data_json.get("team_id")
|
|
duration = data_json.get("duration")
|
|
models = data_json.get("models")
|
|
aliases = data_json.get("aliases")
|
|
config = data_json.get("config")
|
|
spend = data_json.get("spend")
|
|
user_id = data_json.get("user_id")
|
|
max_parallel_requests = data_json.get("max_parallel_requests")
|
|
metadata = data_json.get("metadata")
|
|
tpm_limit = data_json.get("tpm_limit")
|
|
rpm_limit = data_json.get("rpm_limit")
|
|
|
|
if team_id is not None and team_id == "litellm-core-infra@gmail.com":
|
|
# only team_id="litellm-core-infra@gmail.com" can make keys
|
|
return {
|
|
"decision": True,
|
|
}
|
|
else:
|
|
print("Failed custom auth")
|
|
return {
|
|
"decision": False,
|
|
"message": "This violates LiteLLM Proxy Rules. No team id provided.",
|
|
}
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(
|
|
litellm.proxy.proxy_server, "user_custom_key_generate", custom_generate_key_fn
|
|
)
|
|
try:
|
|
|
|
async def test():
|
|
try:
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest()
|
|
key = await generate_key_fn(request)
|
|
pytest.fail(f"Expected an exception. Got {key}")
|
|
except Exception as e:
|
|
# this should fail
|
|
print("Got Exception", e)
|
|
print(e.message)
|
|
print("First request failed!. This is expected")
|
|
assert (
|
|
"This violates LiteLLM Proxy Rules. No team id provided."
|
|
in e.message
|
|
)
|
|
|
|
request_2 = GenerateKeyRequest(
|
|
team_id="litellm-core-infra@gmail.com",
|
|
)
|
|
|
|
key = await generate_key_fn(request_2)
|
|
print(key)
|
|
generated_key = key.key
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
print(e.message)
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
def test_call_with_key_over_budget(prisma_client):
|
|
# 12. Make a call with a key over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(max_budget=0.00001)
|
|
key = await generate_key_fn(request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.caching.caching import Cache
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
litellm.cache = Cache()
|
|
import time
|
|
import uuid
|
|
|
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
|
|
|
resp = ModelResponse(
|
|
id=request_id,
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"model": "chatgpt-v-2",
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": hash_token(generated_key),
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await update_spend(
|
|
prisma_client=prisma_client,
|
|
db_writer_client=None,
|
|
proxy_logging_obj=proxy_logging_obj,
|
|
)
|
|
# test spend_log was written and we can read it
|
|
spend_logs = await view_spend_logs(
|
|
request_id=request_id,
|
|
user_api_key_dict=UserAPIKeyAuth(api_key=generated_key),
|
|
)
|
|
|
|
print("read spend logs", spend_logs)
|
|
assert len(spend_logs) == 1
|
|
|
|
spend_log = spend_logs[0]
|
|
|
|
assert spend_log.request_id == request_id
|
|
assert spend_log.spend == float("2e-05")
|
|
assert spend_log.model == "chatgpt-v-2"
|
|
assert (
|
|
spend_log.cache_key
|
|
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
|
|
)
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail("This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
# print(f"Error - {str(e)}")
|
|
traceback.print_exc()
|
|
if hasattr(e, "message"):
|
|
error_detail = e.message
|
|
else:
|
|
error_detail = str(e)
|
|
assert "Budget has been exceeded" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
def test_call_with_key_over_budget_no_cache(prisma_client):
|
|
# 12. Make a call with a key over budget, expect to fail
|
|
# ✅ Tests if spend trackign works when the key does not exist in memory
|
|
# Related to this: https://github.com/BerriAI/litellm/issues/3920
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(max_budget=0.00001)
|
|
key = await generate_key_fn(request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
user_api_key_cache.in_memory_cache.cache_dict = {}
|
|
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
|
|
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.caching.caching import Cache
|
|
|
|
litellm.cache = Cache()
|
|
import time
|
|
import uuid
|
|
|
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
|
|
|
resp = ModelResponse(
|
|
id=request_id,
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"model": "chatgpt-v-2",
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": hash_token(generated_key),
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await asyncio.sleep(10)
|
|
await update_spend(
|
|
prisma_client=prisma_client,
|
|
db_writer_client=None,
|
|
proxy_logging_obj=proxy_logging_obj,
|
|
)
|
|
# test spend_log was written and we can read it
|
|
spend_logs = await view_spend_logs(
|
|
request_id=request_id,
|
|
user_api_key_dict=UserAPIKeyAuth(api_key=generated_key),
|
|
)
|
|
|
|
print("read spend logs", spend_logs)
|
|
assert len(spend_logs) == 1
|
|
|
|
spend_log = spend_logs[0]
|
|
|
|
assert spend_log.request_id == request_id
|
|
assert spend_log.spend == float("2e-05")
|
|
assert spend_log.model == "chatgpt-v-2"
|
|
assert (
|
|
spend_log.cache_key
|
|
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
|
|
)
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail(f"This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
# print(f"Error - {str(e)}")
|
|
traceback.print_exc()
|
|
if hasattr(e, "message"):
|
|
error_detail = e.message
|
|
else:
|
|
error_detail = str(e)
|
|
assert "Budget has been exceeded" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
def test_call_with_key_over_model_budget(prisma_client):
|
|
# 12. Make a call with a key over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
|
|
async def test():
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
# set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
|
|
request = GenerateKeyRequest(
|
|
max_budget=1000,
|
|
model_max_budget={
|
|
"chatgpt-v-2": 0.000001,
|
|
},
|
|
metadata={"user_api_key": 0.0001},
|
|
)
|
|
key = await generate_key_fn(request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
async def return_body():
|
|
return b'{"model": "chatgpt-v-2"}'
|
|
|
|
request.body = return_body
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.caching.caching import Cache
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
litellm.cache = Cache()
|
|
import time
|
|
import uuid
|
|
|
|
request_id = f"chatcmpl-{uuid.uuid4()}"
|
|
|
|
resp = ModelResponse(
|
|
id=request_id,
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"model": "chatgpt-v-2",
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": hash_token(generated_key),
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00002,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await update_spend(
|
|
prisma_client=prisma_client,
|
|
db_writer_client=None,
|
|
proxy_logging_obj=proxy_logging_obj,
|
|
)
|
|
# test spend_log was written and we can read it
|
|
spend_logs = await view_spend_logs(
|
|
request_id=request_id,
|
|
user_api_key_dict=UserAPIKeyAuth(api_key=generated_key),
|
|
)
|
|
|
|
print("read spend logs", spend_logs)
|
|
assert len(spend_logs) == 1
|
|
|
|
spend_log = spend_logs[0]
|
|
|
|
assert spend_log.request_id == request_id
|
|
assert spend_log.spend == float("2e-05")
|
|
assert spend_log.model == "chatgpt-v-2"
|
|
assert (
|
|
spend_log.cache_key
|
|
== "c891d64397a472e6deb31b87a5ac4d3ed5b2dcc069bc87e2afe91e6d64e95a1e"
|
|
)
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail("This should have failed!. They key crossed it's budget")
|
|
|
|
asyncio.run(test())
|
|
except Exception as e:
|
|
# print(f"Error - {str(e)}")
|
|
traceback.print_exc()
|
|
error_detail = e.message
|
|
assert "Budget has been exceeded!" in error_detail
|
|
assert isinstance(e, ProxyException)
|
|
assert e.type == ProxyErrorTypes.budget_exceeded
|
|
print(vars(e))
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_call_with_key_never_over_budget(prisma_client):
|
|
# Make a call with a key with budget=None, it should never fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
try:
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(max_budget=None)
|
|
key = await generate_key_fn(request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key: {result}")
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
import time
|
|
import uuid
|
|
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
request_id = f"chatcmpl-{uuid.uuid4()}"
|
|
|
|
resp = ModelResponse(
|
|
id=request_id,
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(
|
|
prompt_tokens=210000, completion_tokens=200000, total_tokens=41000
|
|
),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"model": "chatgpt-v-2",
|
|
"stream": False,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": hash_token(generated_key),
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 200000,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await update_spend(
|
|
prisma_client=prisma_client,
|
|
db_writer_client=None,
|
|
proxy_logging_obj=proxy_logging_obj,
|
|
)
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
except Exception as e:
|
|
pytest.fail(f"This should have not failed!. They key uses max_budget=None. {e}")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_call_with_key_over_budget_stream(prisma_client):
|
|
# 14. Make a call with a key over budget, expect to fail
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
import logging
|
|
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
litellm.set_verbose = True
|
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
|
try:
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(max_budget=0.00001)
|
|
key = await generate_key_fn(request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
print(f"generated_key: {generated_key}")
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
import time
|
|
import uuid
|
|
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
|
resp = ModelResponse(
|
|
id=request_id,
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"call_type": "acompletion",
|
|
"model": "sagemaker-chatgpt-v-2",
|
|
"stream": True,
|
|
"complete_streaming_response": resp,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": hash_token(generated_key),
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00005,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
await update_spend(
|
|
prisma_client=prisma_client,
|
|
db_writer_client=None,
|
|
proxy_logging_obj=proxy_logging_obj,
|
|
)
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail(f"This should have failed!. They key crossed it's budget")
|
|
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
error_detail = e.message
|
|
assert "Budget has been exceeded" in error_detail
|
|
|
|
print(vars(e))
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_view_spend_per_user(prisma_client):
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
user_by_spend = await spend_user_fn(user_id=None)
|
|
assert type(user_by_spend) == list
|
|
assert len(user_by_spend) > 0
|
|
first_user = user_by_spend[0]
|
|
|
|
print("\nfirst_user=", first_user)
|
|
assert first_user["spend"] > 0
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_view_spend_per_key(prisma_client):
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
key_by_spend = await spend_key_fn()
|
|
assert type(key_by_spend) == list
|
|
assert len(key_by_spend) > 0
|
|
first_key = key_by_spend[0]
|
|
|
|
print("\nfirst_key=", first_key)
|
|
assert first_key.spend > 0
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_key_name_null(prisma_client):
|
|
"""
|
|
- create key
|
|
- get key info
|
|
- assert key_name is null
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
os.environ["DISABLE_KEY_NAME"] = "True"
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
request = GenerateKeyRequest()
|
|
key = await generate_key_fn(request)
|
|
print("generated key=", key)
|
|
generated_key = key.key
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["info"]["key_name"] is None
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
finally:
|
|
os.environ["DISABLE_KEY_NAME"] = "False"
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_key_name_set(prisma_client):
|
|
"""
|
|
- create key
|
|
- get key info
|
|
- assert key_name is not null
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
request = GenerateKeyRequest()
|
|
key = await generate_key_fn(request)
|
|
generated_key = key.key
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert isinstance(result["info"]["key_name"], str)
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_default_key_params(prisma_client):
|
|
"""
|
|
- create key
|
|
- get key info
|
|
- assert key_name is not null
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
|
|
litellm.default_key_generate_params = {"max_budget": 0.000122}
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
request = GenerateKeyRequest()
|
|
key = await generate_key_fn(request)
|
|
generated_key = key.key
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["info"]["max_budget"] == 0.000122
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_upperbound_key_param_larger_budget(prisma_client):
|
|
"""
|
|
- create key
|
|
- get key info
|
|
- assert key_name is not null
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
litellm.upperbound_key_generate_params = LiteLLM_UpperboundKeyGenerateParams(
|
|
max_budget=0.001, budget_duration="1m"
|
|
)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
request = GenerateKeyRequest(
|
|
max_budget=200000,
|
|
budget_duration="30d",
|
|
)
|
|
key = await generate_key_fn(request)
|
|
# print(result)
|
|
except Exception as e:
|
|
assert e.code == str(400)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_upperbound_key_param_larger_duration(prisma_client):
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
litellm.upperbound_key_generate_params = LiteLLM_UpperboundKeyGenerateParams(
|
|
max_budget=100, duration="14d"
|
|
)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
request = GenerateKeyRequest(
|
|
max_budget=10,
|
|
duration="30d",
|
|
)
|
|
key = await generate_key_fn(request)
|
|
pytest.fail("Expected this to fail but it passed")
|
|
# print(result)
|
|
except Exception as e:
|
|
assert e.code == str(400)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_upperbound_key_param_none_duration(prisma_client):
|
|
from datetime import datetime, timedelta
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
litellm.upperbound_key_generate_params = LiteLLM_UpperboundKeyGenerateParams(
|
|
max_budget=100, duration="14d"
|
|
)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
request = GenerateKeyRequest()
|
|
key = await generate_key_fn(request)
|
|
|
|
print(key)
|
|
# print(result)
|
|
|
|
assert key.max_budget == 100
|
|
assert key.expires is not None
|
|
|
|
_date_key_expires = key.expires.date()
|
|
_fourteen_days_from_now = (datetime.now() + timedelta(days=14)).date()
|
|
|
|
assert _date_key_expires == _fourteen_days_from_now
|
|
except Exception as e:
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
def test_get_bearer_token():
|
|
from litellm.proxy.auth.user_api_key_auth import _get_bearer_token
|
|
|
|
# Test valid Bearer token
|
|
api_key = "Bearer valid_token"
|
|
result = _get_bearer_token(api_key)
|
|
assert result == "valid_token", f"Expected 'valid_token', got '{result}'"
|
|
|
|
# Test empty API key
|
|
api_key = ""
|
|
result = _get_bearer_token(api_key)
|
|
assert result == "", f"Expected '', got '{result}'"
|
|
|
|
# Test API key without Bearer prefix
|
|
api_key = "invalid_token"
|
|
result = _get_bearer_token(api_key)
|
|
assert result == "", f"Expected '', got '{result}'"
|
|
|
|
# Test API key with Bearer prefix and extra spaces
|
|
api_key = " Bearer valid_token "
|
|
result = _get_bearer_token(api_key)
|
|
assert result == "", f"Expected '', got '{result}'"
|
|
|
|
# Test API key with Bearer prefix and no token
|
|
api_key = "Bearer sk-1234"
|
|
result = _get_bearer_token(api_key)
|
|
assert result == "sk-1234", f"Expected 'valid_token', got '{result}'"
|
|
|
|
|
|
def test_update_logs_with_spend_logs_url(prisma_client):
|
|
"""
|
|
Unit test for making sure spend logs list is still updated when url passed in
|
|
"""
|
|
from litellm.proxy.proxy_server import _set_spend_logs_payload
|
|
|
|
payload = {"startTime": datetime.now(), "endTime": datetime.now()}
|
|
_set_spend_logs_payload(payload=payload, prisma_client=prisma_client)
|
|
|
|
assert len(prisma_client.spend_log_transactions) > 0
|
|
|
|
prisma_client.spend_log_transactions = []
|
|
|
|
spend_logs_url = ""
|
|
payload = {"startTime": datetime.now(), "endTime": datetime.now()}
|
|
_set_spend_logs_payload(
|
|
payload=payload, spend_logs_url=spend_logs_url, prisma_client=prisma_client
|
|
)
|
|
|
|
assert len(prisma_client.spend_log_transactions) > 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_user_api_key_auth(prisma_client):
|
|
from litellm.proxy.proxy_server import ProxyException
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
# Test case: No API Key passed in
|
|
try:
|
|
await user_api_key_auth(request, api_key=None)
|
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
|
except ProxyException as exc:
|
|
print(exc.message)
|
|
assert exc.message == "Authentication Error, No api key passed in."
|
|
|
|
# Test case: Malformed API Key (missing 'Bearer ' prefix)
|
|
try:
|
|
await user_api_key_auth(request, api_key="my_token")
|
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
|
except ProxyException as exc:
|
|
print(exc.message)
|
|
assert (
|
|
exc.message
|
|
== "Authentication Error, Malformed API Key passed in. Ensure Key has `Bearer ` prefix. Passed in: my_token"
|
|
)
|
|
|
|
# Test case: User passes empty string API Key
|
|
try:
|
|
await user_api_key_auth(request, api_key="")
|
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
|
except ProxyException as exc:
|
|
print(exc.message)
|
|
assert (
|
|
exc.message
|
|
== "Authentication Error, Malformed API Key passed in. Ensure Key has `Bearer ` prefix. Passed in: "
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_user_api_key_auth_without_master_key(prisma_client):
|
|
# if master key is not set, expect all calls to go through
|
|
try:
|
|
from litellm.proxy.proxy_server import ProxyException
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", None)
|
|
setattr(
|
|
litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True}
|
|
)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
# Test case: No API Key passed in
|
|
|
|
await user_api_key_auth(request, api_key=None)
|
|
await user_api_key_auth(request, api_key="my_token")
|
|
await user_api_key_auth(request, api_key="")
|
|
await user_api_key_auth(request, api_key="Bearer " + "1234")
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_key_with_no_permissions(prisma_client):
|
|
"""
|
|
- create key
|
|
- get key info
|
|
- assert key_name is null
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
try:
|
|
response = await generate_key_helper_fn(
|
|
request_type="key",
|
|
**{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"}, # type: ignore
|
|
)
|
|
|
|
print(response)
|
|
key = response["token"]
|
|
|
|
# make a /chat/completions call -> it should fail
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key="Bearer " + key)
|
|
print("result from user auth with new key", result)
|
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
print(e.message)
|
|
|
|
|
|
async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
|
|
import uuid
|
|
|
|
from litellm import Choices, Message, ModelResponse, Usage
|
|
from litellm.proxy.proxy_server import (
|
|
_PROXY_track_cost_callback as track_cost_callback,
|
|
)
|
|
|
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
|
|
resp = ModelResponse(
|
|
id=request_id,
|
|
choices=[
|
|
Choices(
|
|
finish_reason=None,
|
|
index=0,
|
|
message=Message(
|
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
|
role="assistant",
|
|
),
|
|
)
|
|
],
|
|
model="gpt-35-turbo", # azure always has model written like this
|
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
|
)
|
|
await track_cost_callback(
|
|
kwargs={
|
|
"call_type": "acompletion",
|
|
"model": "sagemaker-chatgpt-v-2",
|
|
"stream": True,
|
|
"complete_streaming_response": resp,
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": hash_token(generated_key),
|
|
"user_api_key_user_id": user_id,
|
|
}
|
|
},
|
|
"response_cost": 0.00005,
|
|
},
|
|
completion_response=resp,
|
|
start_time=datetime.now(),
|
|
end_time=datetime.now(),
|
|
)
|
|
|
|
|
|
@pytest.mark.skip(reason="High traffic load test for spend tracking")
|
|
@pytest.mark.asyncio
|
|
async def test_proxy_load_test_db(prisma_client):
|
|
"""
|
|
Run 1500 req./s against track_cost_callback function
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
import logging
|
|
import time
|
|
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
litellm.set_verbose = True
|
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
|
try:
|
|
start_time = time.time()
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(max_budget=0.00001)
|
|
key = await generate_key_fn(request)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
user_id = key.user_id
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("result from user auth with new key", result)
|
|
# update spend using track_cost callback, make 2nd request, it should fail
|
|
n = 5000
|
|
tasks = [
|
|
track_cost_callback_helper_fn(generated_key=generated_key, user_id=user_id)
|
|
for _ in range(n)
|
|
]
|
|
completions = await asyncio.gather(*tasks)
|
|
await asyncio.sleep(120)
|
|
try:
|
|
# call spend logs
|
|
spend_logs = await view_spend_logs(
|
|
api_key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(api_key=generated_key),
|
|
)
|
|
|
|
print(f"len responses: {len(spend_logs)}")
|
|
assert len(spend_logs) == n
|
|
print(n, time.time() - start_time, len(spend_logs))
|
|
except Exception:
|
|
print(n, time.time() - start_time, 0)
|
|
raise Exception(f"it worked! key={key.key}")
|
|
except Exception as e:
|
|
pytest.fail(f"An exception occurred - {str(e)}")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_master_key_hashing(prisma_client):
|
|
try:
|
|
import uuid
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
master_key = "sk-1234"
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", master_key)
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
_team_id = "ishaans-special-team_{}".format(uuid.uuid4())
|
|
user_api_key_dict = UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
)
|
|
await new_team(
|
|
NewTeamRequest(team_id=_team_id),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
|
|
_response = await new_user(
|
|
data=NewUserRequest(
|
|
models=["azure-gpt-3.5"],
|
|
team_id=_team_id,
|
|
tpm_limit=20,
|
|
),
|
|
user_api_key_dict=user_api_key_dict,
|
|
)
|
|
print(_response)
|
|
assert _response.models == ["azure-gpt-3.5"]
|
|
assert _response.team_id == _team_id
|
|
assert _response.tpm_limit == 20
|
|
|
|
bearer_token = "Bearer " + master_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# use generated key to auth in
|
|
result: UserAPIKeyAuth = await user_api_key_auth(
|
|
request=request, api_key=bearer_token
|
|
)
|
|
|
|
assert result.api_key == hash_token(master_key)
|
|
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
pytest.fail(f"Got exception {e}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reset_spend_authentication(prisma_client):
|
|
"""
|
|
1. Test master key can access this route -> ONLY MASTER KEY SHOULD BE ABLE TO RESET SPEND
|
|
2. Test that non-master key gets rejected
|
|
3. Test that non-master key with role == LitellmUserRoles.PROXY_ADMIN or admin gets rejected
|
|
"""
|
|
|
|
print("prisma client=", prisma_client)
|
|
|
|
master_key = "sk-1234"
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", master_key)
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
bearer_token = "Bearer " + master_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/global/spend/reset")
|
|
|
|
# Test 1 - Master Key
|
|
result: UserAPIKeyAuth = await user_api_key_auth(
|
|
request=request, api_key=bearer_token
|
|
)
|
|
|
|
print("result from user auth with Master key", result)
|
|
assert result.token is not None
|
|
|
|
# Test 2 - Non-Master Key
|
|
_response = await new_user(
|
|
data=NewUserRequest(
|
|
tpm_limit=20,
|
|
)
|
|
)
|
|
|
|
generate_key = "Bearer " + _response.key
|
|
|
|
try:
|
|
await user_api_key_auth(request=request, api_key=generate_key)
|
|
pytest.fail(f"This should have failed!. IT's an expired key")
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
assert (
|
|
"Tried to access route=/global/spend/reset, which is only for MASTER KEY"
|
|
in e.message
|
|
)
|
|
|
|
# Test 3 - Non-Master Key with role == LitellmUserRoles.PROXY_ADMIN or admin
|
|
_response = await new_user(
|
|
data=NewUserRequest(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
tpm_limit=20,
|
|
)
|
|
)
|
|
|
|
generate_key = "Bearer " + _response.key
|
|
|
|
try:
|
|
await user_api_key_auth(request=request, api_key=generate_key)
|
|
pytest.fail(f"This should have failed!. IT's an expired key")
|
|
except Exception as e:
|
|
print("Got Exception", e)
|
|
assert (
|
|
"Tried to access route=/global/spend/reset, which is only for MASTER KEY"
|
|
in e.message
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_create_update_team(prisma_client):
|
|
"""
|
|
- Set max_budget, budget_duration, max_budget, tpm_limit, rpm_limit
|
|
- Assert response has correct values
|
|
|
|
- Update max_budget, budget_duration, max_budget, tpm_limit, rpm_limit
|
|
- Assert response has correct values
|
|
|
|
- Call team_info and assert response has correct values
|
|
"""
|
|
print("prisma client=", prisma_client)
|
|
|
|
master_key = "sk-1234"
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", master_key)
|
|
import datetime
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
from litellm.proxy.proxy_server import user_api_key_cache
|
|
|
|
_team_id = "test-team_{}".format(uuid.uuid4())
|
|
response = await new_team(
|
|
NewTeamRequest(
|
|
team_id=_team_id,
|
|
max_budget=20,
|
|
budget_duration="30d",
|
|
tpm_limit=20,
|
|
rpm_limit=20,
|
|
),
|
|
http_request=Request(scope={"type": "http"}),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
print("RESPONSE from new_team", response)
|
|
|
|
assert response["team_id"] == _team_id
|
|
assert response["max_budget"] == 20
|
|
assert response["tpm_limit"] == 20
|
|
assert response["rpm_limit"] == 20
|
|
assert response["budget_duration"] == "30d"
|
|
assert response["budget_reset_at"] is not None and isinstance(
|
|
response["budget_reset_at"], datetime.datetime
|
|
)
|
|
|
|
# updating team budget duration and reset at
|
|
|
|
response = await update_team(
|
|
UpdateTeamRequest(
|
|
team_id=_team_id,
|
|
max_budget=30,
|
|
budget_duration="2d",
|
|
tpm_limit=30,
|
|
rpm_limit=30,
|
|
),
|
|
http_request=Request(scope={"type": "http"}),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
print("RESPONSE from update_team", response)
|
|
_updated_info = response["data"]
|
|
_updated_info = dict(_updated_info)
|
|
|
|
assert _updated_info["team_id"] == _team_id
|
|
assert _updated_info["max_budget"] == 30
|
|
assert _updated_info["tpm_limit"] == 30
|
|
assert _updated_info["rpm_limit"] == 30
|
|
assert _updated_info["budget_duration"] == "2d"
|
|
assert _updated_info["budget_reset_at"] is not None and isinstance(
|
|
_updated_info["budget_reset_at"], datetime.datetime
|
|
)
|
|
|
|
# budget_reset_at should be 2 days from now
|
|
budget_reset_at = _updated_info["budget_reset_at"].replace(tzinfo=timezone.utc)
|
|
current_time = datetime.datetime.now(timezone.utc)
|
|
|
|
# assert budget_reset_at is 2 days from now
|
|
assert (
|
|
abs((budget_reset_at - current_time).total_seconds() - 2 * 24 * 60 * 60) <= 10
|
|
)
|
|
|
|
# now hit team_info
|
|
try:
|
|
response = await team_info(
|
|
team_id=_team_id,
|
|
http_request=Request(scope={"type": "http"}),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
except Exception as e:
|
|
print(e)
|
|
pytest.fail("Receives error - {}".format(e))
|
|
|
|
_team_info = response["team_info"]
|
|
_team_info = dict(_team_info)
|
|
|
|
assert _team_info["team_id"] == _team_id
|
|
assert _team_info["max_budget"] == 30
|
|
assert _team_info["tpm_limit"] == 30
|
|
assert _team_info["rpm_limit"] == 30
|
|
assert _team_info["budget_duration"] == "2d"
|
|
assert _team_info["budget_reset_at"] is not None and isinstance(
|
|
_team_info["budget_reset_at"], datetime.datetime
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_enforced_params(prisma_client):
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
from litellm.proxy.proxy_server import general_settings
|
|
|
|
general_settings["enforced_params"] = [
|
|
"user",
|
|
"metadata",
|
|
"metadata.generation_name",
|
|
]
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = NewUserRequest()
|
|
key = await new_user(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# Case 1: Missing user
|
|
async def return_body():
|
|
return b'{"model": "gemini-pro-vision"}'
|
|
|
|
request.body = return_body
|
|
try:
|
|
await user_api_key_auth(request=request, api_key=bearer_token)
|
|
pytest.fail(f"This should have failed!. IT's an invalid request")
|
|
except Exception as e:
|
|
assert (
|
|
"BadRequest please pass param=user in request body. This is a required param"
|
|
in e.message
|
|
)
|
|
|
|
# Case 2: Missing metadata["generation_name"]
|
|
async def return_body_2():
|
|
return b'{"model": "gemini-pro-vision", "user": "1234", "metadata": {}}'
|
|
|
|
request.body = return_body_2
|
|
try:
|
|
await user_api_key_auth(request=request, api_key=bearer_token)
|
|
pytest.fail(f"This should have failed!. IT's an invalid request")
|
|
except Exception as e:
|
|
assert (
|
|
"Authentication Error, BadRequest please pass param=[metadata][generation_name] in request body"
|
|
in e.message
|
|
)
|
|
general_settings.pop("enforced_params")
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_update_user_role(prisma_client):
|
|
"""
|
|
Tests if we update user role, incorrect values are not stored in cache
|
|
-> create a user with role == INTERNAL_USER
|
|
-> access an Admin only route -> expect to fail
|
|
-> update user role to == PROXY_ADMIN
|
|
-> access an Admin only route -> expect to succeed
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
key = await new_user(
|
|
data=NewUserRequest(
|
|
user_role=LitellmUserRoles.INTERNAL_USER,
|
|
)
|
|
)
|
|
|
|
print(key)
|
|
api_key = "Bearer " + key.key
|
|
|
|
api_route = APIRoute(path="/global/spend", endpoint=global_spend)
|
|
request = Request(
|
|
{
|
|
"type": "http",
|
|
"route": api_route,
|
|
"path": "/global/spend",
|
|
"headers": [("Authorization", api_key)],
|
|
}
|
|
)
|
|
|
|
request._url = URL(url="/global/spend")
|
|
|
|
# use generated key to auth in
|
|
try:
|
|
result = await user_api_key_auth(request=request, api_key=api_key)
|
|
print("result from user auth with new key", result)
|
|
except Exception as e:
|
|
print(e)
|
|
pass
|
|
|
|
await user_update(
|
|
data=UpdateUserRequest(
|
|
user_id=key.user_id, user_role=LitellmUserRoles.PROXY_ADMIN
|
|
)
|
|
)
|
|
|
|
# await asyncio.sleep(3)
|
|
|
|
# use generated key to auth in
|
|
print("\n\nMAKING NEW REQUEST WITH UPDATED USER ROLE\n\n")
|
|
result = await user_api_key_auth(request=request, api_key=api_key)
|
|
print("result from user auth with new key", result)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_update_user_unit_test(prisma_client):
|
|
"""
|
|
Unit test for /user/update
|
|
|
|
Ensure that params are updated for UpdateUserRequest
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
key = await new_user(
|
|
data=NewUserRequest(
|
|
user_email="test@test.com",
|
|
)
|
|
)
|
|
|
|
print(key)
|
|
|
|
user_info = await user_update(
|
|
data=UpdateUserRequest(
|
|
user_id=key.user_id,
|
|
team_id="1234",
|
|
max_budget=100,
|
|
budget_duration="10d",
|
|
tpm_limit=100,
|
|
rpm_limit=100,
|
|
metadata={"very-new-metadata": "something"},
|
|
)
|
|
)
|
|
|
|
print("user_info", user_info)
|
|
assert user_info is not None
|
|
_user_info = user_info["data"].model_dump()
|
|
|
|
assert _user_info["user_id"] == key.user_id
|
|
assert _user_info["team_id"] == "1234"
|
|
assert _user_info["max_budget"] == 100
|
|
assert _user_info["budget_duration"] == "10d"
|
|
assert _user_info["tpm_limit"] == 100
|
|
assert _user_info["rpm_limit"] == 100
|
|
assert _user_info["metadata"] == {"very-new-metadata": "something"}
|
|
|
|
# budget reset at should be 10 days from now
|
|
budget_reset_at = _user_info["budget_reset_at"].replace(tzinfo=timezone.utc)
|
|
current_time = datetime.now(timezone.utc)
|
|
assert (
|
|
abs((budget_reset_at - current_time).total_seconds() - 10 * 24 * 60 * 60) <= 10
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_custom_api_key_header_name(prisma_client):
|
|
""" """
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(
|
|
litellm.proxy.proxy_server,
|
|
"general_settings",
|
|
{"litellm_key_header_name": "x-litellm-key"},
|
|
)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
api_route = APIRoute(path="/chat/completions", endpoint=chat_completion)
|
|
request = Request(
|
|
{
|
|
"type": "http",
|
|
"route": api_route,
|
|
"path": api_route.path,
|
|
"headers": [
|
|
(b"x-litellm-key", b"Bearer sk-1234"),
|
|
],
|
|
}
|
|
)
|
|
|
|
# this should pass because we pass the master key as X-Litellm-Key and litellm_key_header_name="X-Litellm-Key" in general settings
|
|
result = await user_api_key_auth(request=request, api_key="Bearer invalid-key")
|
|
|
|
# this should fail because X-Litellm-Key is invalid
|
|
request = Request(
|
|
{
|
|
"type": "http",
|
|
"route": api_route,
|
|
"path": api_route.path,
|
|
"headers": [],
|
|
}
|
|
)
|
|
try:
|
|
result = await user_api_key_auth(request=request, api_key="Bearer sk-1234")
|
|
pytest.fail(f"This should have failed!. invalid Auth on this request")
|
|
except Exception as e:
|
|
print("failed with error", e)
|
|
assert (
|
|
"No LiteLLM Virtual Key pass. Please set header=x-litellm-key: Bearer <api_key>"
|
|
in e.message
|
|
)
|
|
pass
|
|
|
|
# this should pass because X-Litellm-Key is valid
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_generate_key_with_model_tpm_limit(prisma_client):
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(
|
|
metadata={
|
|
"team": "litellm-team3",
|
|
"model_tpm_limit": {"gpt-4": 100},
|
|
"model_rpm_limit": {"gpt-4": 2},
|
|
}
|
|
)
|
|
key = await generate_key_fn(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
|
|
# use generated key to auth in
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["metadata"] == {
|
|
"team": "litellm-team3",
|
|
"model_tpm_limit": {"gpt-4": 100},
|
|
"model_rpm_limit": {"gpt-4": 2},
|
|
}
|
|
|
|
# Update model tpm_limit and rpm_limit
|
|
request = UpdateKeyRequest(
|
|
key=generated_key,
|
|
model_tpm_limit={"gpt-4": 200},
|
|
model_rpm_limit={"gpt-4": 3},
|
|
)
|
|
_request = Request(scope={"type": "http"})
|
|
_request._url = URL(url="/update/key")
|
|
|
|
await update_key_fn(data=request, request=_request)
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["metadata"] == {
|
|
"team": "litellm-team3",
|
|
"model_tpm_limit": {"gpt-4": 200},
|
|
"model_rpm_limit": {"gpt-4": 3},
|
|
}
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_generate_key_with_guardrails(prisma_client):
|
|
print("prisma client=", prisma_client)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
request = GenerateKeyRequest(
|
|
guardrails=["aporia-pre-call"],
|
|
metadata={
|
|
"team": "litellm-team3",
|
|
},
|
|
)
|
|
key = await generate_key_fn(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print("generated key=", key)
|
|
|
|
generated_key = key.key
|
|
|
|
# use generated key to auth in
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["metadata"] == {
|
|
"team": "litellm-team3",
|
|
"guardrails": ["aporia-pre-call"],
|
|
}
|
|
|
|
# Update model tpm_limit and rpm_limit
|
|
request = UpdateKeyRequest(
|
|
key=generated_key,
|
|
guardrails=["aporia-pre-call", "aporia-post-call"],
|
|
)
|
|
_request = Request(scope={"type": "http"})
|
|
_request._url = URL(url="/update/key")
|
|
|
|
await update_key_fn(data=request, request=_request)
|
|
result = await info_key_fn(
|
|
key=generated_key,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
print("result from info_key_fn", result)
|
|
assert result["key"] == generated_key
|
|
print("\n info for key=", result["info"])
|
|
assert result["info"]["metadata"] == {
|
|
"team": "litellm-team3",
|
|
"guardrails": ["aporia-pre-call", "aporia-post-call"],
|
|
}
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
@pytest.mark.flaky(retries=6, delay=1)
|
|
async def test_team_access_groups(prisma_client):
|
|
"""
|
|
Test team based model access groups
|
|
|
|
- Test calling a model in the access group -> pass
|
|
- Test calling a model not in the access group -> fail
|
|
"""
|
|
litellm.set_verbose = True
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
# create router with access groups
|
|
litellm_router = litellm.Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "gemini-pro-vision",
|
|
"litellm_params": {
|
|
"model": "vertex_ai/gemini-1.0-pro-vision-001",
|
|
},
|
|
"model_info": {"access_groups": ["beta-models"]},
|
|
},
|
|
{
|
|
"model_name": "gpt-4o",
|
|
"litellm_params": {
|
|
"model": "gpt-4o",
|
|
},
|
|
"model_info": {"access_groups": ["beta-models"]},
|
|
},
|
|
]
|
|
)
|
|
setattr(litellm.proxy.proxy_server, "llm_router", litellm_router)
|
|
|
|
# Create team with models=["beta-models"]
|
|
team_request = NewTeamRequest(
|
|
team_alias="testing-team",
|
|
models=["beta-models"],
|
|
)
|
|
|
|
new_team_response = await new_team(
|
|
data=team_request,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
print("new_team_response", new_team_response)
|
|
created_team_id = new_team_response["team_id"]
|
|
|
|
# create key with team_id=created_team_id
|
|
request = GenerateKeyRequest(
|
|
team_id=created_team_id,
|
|
)
|
|
|
|
key = await generate_key_fn(
|
|
data=request,
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
print(key)
|
|
|
|
generated_key = key.key
|
|
bearer_token = "Bearer " + generated_key
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
for model in ["gpt-4o", "gemini-pro-vision"]:
|
|
# Expect these to pass
|
|
async def return_body():
|
|
return_string = f'{{"model": "{model}"}}'
|
|
# return string as bytes
|
|
return return_string.encode()
|
|
|
|
request.body = return_body
|
|
|
|
# use generated key to auth in
|
|
print(
|
|
"Bearer token being sent to user_api_key_auth() - {}".format(bearer_token)
|
|
)
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
|
|
for model in ["gpt-4", "gpt-4o-mini", "gemini-experimental"]:
|
|
# Expect these to fail
|
|
async def return_body_2():
|
|
return_string = f'{{"model": "{model}"}}'
|
|
# return string as bytes
|
|
return return_string.encode()
|
|
|
|
request.body = return_body_2
|
|
|
|
# use generated key to auth in
|
|
print(
|
|
"Bearer token being sent to user_api_key_auth() - {}".format(bearer_token)
|
|
)
|
|
try:
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
pytest.fail(f"This should have failed!. IT's an invalid model")
|
|
except Exception as e:
|
|
print("got exception", e)
|
|
assert (
|
|
"not allowed to call model" in e.message
|
|
and "Allowed team models" in e.message
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio()
|
|
async def test_team_tags(prisma_client):
|
|
"""
|
|
- Test setting tags on a team
|
|
- Assert this is returned when calling /team/info
|
|
- Team/update with tags should update the tags
|
|
- Assert new tags are returned when calling /team/info
|
|
"""
|
|
litellm.set_verbose = True
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
_new_team = NewTeamRequest(
|
|
team_alias="test-teamA",
|
|
tags=["teamA"],
|
|
)
|
|
|
|
new_team_response = await new_team(
|
|
data=_new_team,
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
|
|
print("new_team_response", new_team_response)
|
|
|
|
# call /team/info
|
|
team_info_response = await team_info(
|
|
team_id=new_team_response["team_id"],
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
print("team_info_response", team_info_response)
|
|
|
|
assert team_info_response["team_info"].metadata["tags"] == ["teamA"]
|
|
|
|
# team update with tags
|
|
team_update_response = await update_team(
|
|
data=UpdateTeamRequest(
|
|
team_id=new_team_response["team_id"],
|
|
tags=["teamA", "teamB"],
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
|
|
print("team_update_response", team_update_response)
|
|
|
|
# call /team/info again
|
|
team_info_response = await team_info(
|
|
team_id=new_team_response["team_id"],
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
http_request=Request(scope={"type": "http"}),
|
|
)
|
|
|
|
print("team_info_response", team_info_response)
|
|
assert team_info_response["team_info"].metadata["tags"] == ["teamA", "teamB"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_admin_only_routes(prisma_client):
|
|
"""
|
|
Tests if setting admin_only_routes works
|
|
|
|
only an admin should be able to access admin only routes
|
|
"""
|
|
litellm.set_verbose = True
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
general_settings = {
|
|
"allowed_routes": ["/embeddings", "/key/generate"],
|
|
"admin_only_routes": ["/key/generate"],
|
|
}
|
|
from litellm.proxy import proxy_server
|
|
|
|
initial_general_settings = getattr(proxy_server, "general_settings")
|
|
|
|
setattr(proxy_server, "general_settings", general_settings)
|
|
|
|
admin_user = await new_user(
|
|
data=NewUserRequest(
|
|
user_name="admin",
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
|
|
non_admin_user = await new_user(
|
|
data=NewUserRequest(
|
|
user_name="non-admin",
|
|
user_role=LitellmUserRoles.INTERNAL_USER,
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
|
|
admin_user_key = admin_user.key
|
|
non_admin_user_key = non_admin_user.key
|
|
|
|
assert admin_user_key is not None
|
|
assert non_admin_user_key is not None
|
|
|
|
# assert non-admin can not access admin routes
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/key/generate")
|
|
await user_api_key_auth(
|
|
request=request,
|
|
api_key="Bearer " + admin_user_key,
|
|
)
|
|
|
|
# this should pass
|
|
|
|
try:
|
|
await user_api_key_auth(
|
|
request=request,
|
|
api_key="Bearer " + non_admin_user_key,
|
|
)
|
|
pytest.fail("Expected this call to fail. User is over limit.")
|
|
except Exception as e:
|
|
print("error str=", str(e.message))
|
|
error_str = str(e.message)
|
|
assert "Route" in error_str and "admin only route" in error_str
|
|
pass
|
|
|
|
setattr(proxy_server, "general_settings", initial_general_settings)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_keys(prisma_client):
|
|
"""
|
|
Test the list_keys function:
|
|
- Test basic key
|
|
- Test pagination
|
|
- Test filtering by user_id, and key_alias
|
|
"""
|
|
from fastapi import Query
|
|
|
|
from litellm.proxy.proxy_server import hash_token
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
# Test basic listing
|
|
request = Request(scope={"type": "http", "query_string": b""})
|
|
response = await list_keys(
|
|
request,
|
|
UserAPIKeyAuth(),
|
|
page=1,
|
|
size=10,
|
|
)
|
|
print("response=", response)
|
|
assert "keys" in response
|
|
assert len(response["keys"]) > 0
|
|
assert "total_count" in response
|
|
assert "current_page" in response
|
|
assert "total_pages" in response
|
|
|
|
# Test pagination
|
|
response = await list_keys(request, UserAPIKeyAuth(), page=1, size=2)
|
|
print("pagination response=", response)
|
|
assert len(response["keys"]) == 2
|
|
assert response["current_page"] == 1
|
|
|
|
# Test filtering by user_id
|
|
|
|
unique_id = str(uuid.uuid4())
|
|
team_id = f"key-list-team-{unique_id}"
|
|
key_alias = f"key-list-alias-{unique_id}"
|
|
user_id = f"key-list-user-{unique_id}"
|
|
response = await new_user(
|
|
data=NewUserRequest(
|
|
user_id=f"key-list-user-{unique_id}",
|
|
user_role=LitellmUserRoles.INTERNAL_USER,
|
|
key_alias=f"key-list-alias-{unique_id}",
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
|
)
|
|
|
|
_key = hash_token(response.key)
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
# Test filtering by user_id
|
|
response = await list_keys(
|
|
request, UserAPIKeyAuth(), user_id=user_id, page=1, size=10
|
|
)
|
|
print("filtered user_id response=", response)
|
|
assert len(response["keys"]) == 1
|
|
assert _key in response["keys"]
|
|
|
|
# Test filtering by key_alias
|
|
response = await list_keys(
|
|
request, UserAPIKeyAuth(), key_alias=key_alias, page=1, size=10
|
|
)
|
|
assert len(response["keys"]) == 1
|
|
assert _key in response["keys"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_key_list_unsupported_params(prisma_client):
|
|
"""
|
|
Test the list_keys function:
|
|
- Test unsupported params
|
|
"""
|
|
|
|
from litellm.proxy.proxy_server import hash_token
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
request = Request(scope={"type": "http", "query_string": b"alias=foo"})
|
|
|
|
try:
|
|
await list_keys(request, UserAPIKeyAuth(), page=1, size=10)
|
|
pytest.fail("Expected this call to fail")
|
|
except Exception as e:
|
|
print("error str=", str(e.message))
|
|
error_str = str(e.message)
|
|
assert "Unsupported parameter" in error_str
|
|
pass
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_auth_vertex_ai_route(prisma_client):
|
|
"""
|
|
If user is premium user and vertex-ai route is used. Assert Virtual Key checks are run
|
|
"""
|
|
litellm.set_verbose = True
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "premium_user", True)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
route = "/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent"
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url=route)
|
|
request._headers = {"Authorization": "Bearer sk-12345"}
|
|
try:
|
|
await user_api_key_auth(request=request, api_key="Bearer " + "sk-12345")
|
|
pytest.fail("Expected this call to fail. User is over limit.")
|
|
except Exception as e:
|
|
print(vars(e))
|
|
print("error str=", str(e.message))
|
|
error_str = str(e.message)
|
|
assert e.code == "401"
|
|
assert "Invalid proxy server token passed" in error_str
|
|
|
|
pass
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_service_accounts(prisma_client):
|
|
"""
|
|
Do not delete
|
|
this is the Admin UI flow
|
|
"""
|
|
# Make a call to a key with model = `all-proxy-models` this is an Alias from LiteLLM Admin UI
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(
|
|
litellm.proxy.proxy_server,
|
|
"general_settings",
|
|
{"service_account_settings": {"enforced_params": ["user"]}},
|
|
)
|
|
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
request = GenerateKeyRequest(
|
|
metadata={"service_account_id": f"prod-service-{uuid.uuid4()}"},
|
|
)
|
|
response = await generate_key_fn(
|
|
data=request,
|
|
)
|
|
|
|
print("key generated=", response)
|
|
generated_key = response.key
|
|
bearer_token = "Bearer " + generated_key
|
|
# make a bad /chat/completions call expect it to fail
|
|
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
async def return_body():
|
|
return b'{"model": "gemini-pro-vision"}'
|
|
|
|
request.body = return_body
|
|
|
|
# use generated key to auth in
|
|
print("Bearer token being sent to user_api_key_auth() - {}".format(bearer_token))
|
|
try:
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
pytest.fail("Expected this call to fail. Bad request using service account")
|
|
except Exception as e:
|
|
print("error str=", str(e.message))
|
|
assert "This is a required param for service account" in str(e.message)
|
|
|
|
# make a good /chat/completions call it should pass
|
|
async def good_return_body():
|
|
return b'{"model": "gemini-pro-vision", "user": "foo"}'
|
|
|
|
request.body = good_return_body
|
|
|
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
|
print("response from user_api_key_auth", result)
|
|
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {})
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_user_api_key_auth_db_unavailable():
|
|
"""
|
|
Test that user_api_key_auth handles DB connection failures appropriately when:
|
|
1. DB connection fails during token validation
|
|
2. allow_requests_on_db_unavailable=True
|
|
"""
|
|
litellm.set_verbose = True
|
|
|
|
# Mock dependencies
|
|
class MockPrismaClient:
|
|
async def get_data(self, *args, **kwargs):
|
|
print("MockPrismaClient.get_data() called")
|
|
raise httpx.ConnectError("Failed to connect to DB")
|
|
|
|
async def connect(self):
|
|
print("MockPrismaClient.connect() called")
|
|
pass
|
|
|
|
class MockDualCache:
|
|
async def async_get_cache(self, *args, **kwargs):
|
|
return None
|
|
|
|
async def async_set_cache(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def set_cache(self, *args, **kwargs):
|
|
pass
|
|
|
|
# Set up test environment
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", MockPrismaClient())
|
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", MockDualCache())
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(
|
|
litellm.proxy.proxy_server,
|
|
"general_settings",
|
|
{"allow_requests_on_db_unavailable": True},
|
|
)
|
|
|
|
# Create test request
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# Run test with a sample API key
|
|
result = await user_api_key_auth(
|
|
request=request,
|
|
api_key="Bearer sk-123456789",
|
|
)
|
|
|
|
# Verify results
|
|
assert isinstance(result, UserAPIKeyAuth)
|
|
assert result.key_name == "failed-to-connect-to-db"
|
|
assert result.user_id == litellm.proxy.proxy_server.litellm_proxy_admin_name
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_user_api_key_auth_db_unavailable_not_allowed():
|
|
"""
|
|
Test that user_api_key_auth raises an exception when:
|
|
This is default behavior
|
|
|
|
1. DB connection fails during token validation
|
|
2. allow_requests_on_db_unavailable=False (default behavior)
|
|
"""
|
|
|
|
# Mock dependencies
|
|
class MockPrismaClient:
|
|
async def get_data(self, *args, **kwargs):
|
|
print("MockPrismaClient.get_data() called")
|
|
raise httpx.ConnectError("Failed to connect to DB")
|
|
|
|
async def connect(self):
|
|
print("MockPrismaClient.connect() called")
|
|
pass
|
|
|
|
class MockDualCache:
|
|
async def async_get_cache(self, *args, **kwargs):
|
|
return None
|
|
|
|
async def async_set_cache(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def set_cache(self, *args, **kwargs):
|
|
pass
|
|
|
|
# Set up test environment
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", MockPrismaClient())
|
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", MockDualCache())
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {})
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
|
|
# Create test request
|
|
request = Request(scope={"type": "http"})
|
|
request._url = URL(url="/chat/completions")
|
|
|
|
# Run test with a sample API key
|
|
with pytest.raises(litellm.proxy._types.ProxyException):
|
|
await user_api_key_auth(
|
|
request=request,
|
|
api_key="Bearer sk-123456789",
|
|
)
|
|
|
|
|
|
## E2E Virtual Key + Secret Manager Tests #########################################
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_key_generate_with_secret_manager_call(prisma_client):
|
|
"""
|
|
Generate a key
|
|
assert it exists in the secret manager
|
|
|
|
delete the key
|
|
assert it is deleted from the secret manager
|
|
"""
|
|
from litellm.secret_managers.aws_secret_manager_v2 import AWSSecretsManagerV2
|
|
from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
|
|
|
|
from litellm.proxy.hooks.key_management_event_hooks import (
|
|
LITELLM_PREFIX_STORED_VIRTUAL_KEYS,
|
|
)
|
|
|
|
litellm.set_verbose = True
|
|
|
|
#### Test Setup ############################################################
|
|
aws_secret_manager_client = AWSSecretsManagerV2()
|
|
litellm.secret_manager_client = aws_secret_manager_client
|
|
litellm._key_management_system = KeyManagementSystem.AWS_SECRET_MANAGER
|
|
litellm._key_management_settings = KeyManagementSettings(
|
|
store_virtual_keys=True,
|
|
)
|
|
general_settings = {
|
|
"key_management_system": "aws_secret_manager",
|
|
"key_management_settings": {
|
|
"store_virtual_keys": True,
|
|
},
|
|
}
|
|
|
|
setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
############################################################################
|
|
|
|
# generate new key
|
|
key_alias = f"test_alias_secret_manager_key-{uuid.uuid4()}"
|
|
spend = 100
|
|
max_budget = 400
|
|
models = ["fake-openai-endpoint"]
|
|
new_key = await generate_key_fn(
|
|
data=GenerateKeyRequest(
|
|
key_alias=key_alias, spend=spend, max_budget=max_budget, models=models
|
|
),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
generated_key = new_key.key
|
|
print(generated_key)
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
# read from the secret manager
|
|
|
|
result = await aws_secret_manager_client.async_read_secret(
|
|
secret_name=f"{litellm._key_management_settings.prefix_for_stored_virtual_keys}/{key_alias}"
|
|
)
|
|
|
|
# Assert the correct key is stored in the secret manager
|
|
print("response from AWS Secret Manager")
|
|
print(result)
|
|
assert result == generated_key
|
|
|
|
# delete the key
|
|
await delete_key_fn(
|
|
data=KeyRequest(keys=[generated_key]),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN, api_key="sk-1234", user_id="1234"
|
|
),
|
|
)
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
# Assert the key is deleted from the secret manager
|
|
|
|
result = await aws_secret_manager_client.async_read_secret(
|
|
secret_name=f"{litellm._key_management_settings.prefix_for_stored_virtual_keys}/{key_alias}"
|
|
)
|
|
assert result is None
|
|
|
|
# cleanup
|
|
setattr(litellm.proxy.proxy_server, "general_settings", {})
|
|
|
|
|
|
################################################################################
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_key_alias_uniqueness(prisma_client):
|
|
"""
|
|
Test that:
|
|
1. We cannot create two keys with the same alias
|
|
2. We cannot update a key to use an alias that's already taken
|
|
3. We can update a key while keeping its existing alias
|
|
"""
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
try:
|
|
# Create first key with an alias
|
|
unique_alias = f"test-alias-{uuid.uuid4()}"
|
|
key1 = await generate_key_fn(
|
|
data=GenerateKeyRequest(key_alias=unique_alias),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
# Try to create second key with same alias - should fail
|
|
try:
|
|
key2 = await generate_key_fn(
|
|
data=GenerateKeyRequest(key_alias=unique_alias),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
pytest.fail("Should not be able to create a second key with the same alias")
|
|
except Exception as e:
|
|
print("vars(e)=", vars(e))
|
|
assert "Unique key aliases across all keys are required" in str(e.message)
|
|
|
|
# Create another key with different alias
|
|
another_alias = f"test-alias-{uuid.uuid4()}"
|
|
key3 = await generate_key_fn(
|
|
data=GenerateKeyRequest(key_alias=another_alias),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
# Try to update key3 to use key1's alias - should fail
|
|
try:
|
|
await update_key_fn(
|
|
data=UpdateKeyRequest(key=key3.key, key_alias=unique_alias),
|
|
request=Request(scope={"type": "http"}),
|
|
)
|
|
pytest.fail("Should not be able to update a key to use an existing alias")
|
|
except Exception as e:
|
|
assert "Unique key aliases across all keys are required" in str(e.message)
|
|
|
|
# Update key1 with its own existing alias - should succeed
|
|
updated_key = await update_key_fn(
|
|
data=UpdateKeyRequest(key=key1.key, key_alias=unique_alias),
|
|
request=Request(scope={"type": "http"}),
|
|
)
|
|
assert updated_key is not None
|
|
|
|
except Exception as e:
|
|
print("got exceptions, e=", e)
|
|
print("vars(e)=", vars(e))
|
|
pytest.fail(f"An unexpected error occurred: {str(e)}")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_enforce_unique_key_alias(prisma_client):
|
|
"""
|
|
Unit test the _enforce_unique_key_alias function:
|
|
1. Test it allows unique aliases
|
|
2. Test it blocks duplicate aliases for new keys
|
|
3. Test it allows updating a key with its own existing alias
|
|
4. Test it blocks updating a key with another key's alias
|
|
"""
|
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
|
_enforce_unique_key_alias,
|
|
)
|
|
|
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
|
|
|
try:
|
|
# Test 1: Allow unique alias
|
|
unique_alias = f"test-alias-{uuid.uuid4()}"
|
|
await _enforce_unique_key_alias(
|
|
key_alias=unique_alias,
|
|
prisma_client=prisma_client,
|
|
) # Should pass
|
|
|
|
# Create a key with this alias in the database
|
|
key1 = await generate_key_fn(
|
|
data=GenerateKeyRequest(key_alias=unique_alias),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
# Test 2: Block duplicate alias for new key
|
|
try:
|
|
await _enforce_unique_key_alias(
|
|
key_alias=unique_alias,
|
|
prisma_client=prisma_client,
|
|
)
|
|
pytest.fail("Should not allow duplicate alias")
|
|
except Exception as e:
|
|
assert "Unique key aliases across all keys are required" in str(e.message)
|
|
|
|
# Test 3: Allow updating key with its own alias
|
|
await _enforce_unique_key_alias(
|
|
key_alias=unique_alias,
|
|
existing_key_token=hash_token(key1.key),
|
|
prisma_client=prisma_client,
|
|
) # Should pass
|
|
|
|
# Test 4: Block updating with another key's alias
|
|
another_key = await generate_key_fn(
|
|
data=GenerateKeyRequest(key_alias=f"test-alias-{uuid.uuid4()}"),
|
|
user_api_key_dict=UserAPIKeyAuth(
|
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
|
api_key="sk-1234",
|
|
user_id="1234",
|
|
),
|
|
)
|
|
|
|
try:
|
|
await _enforce_unique_key_alias(
|
|
key_alias=unique_alias,
|
|
existing_key_token=another_key.key,
|
|
prisma_client=prisma_client,
|
|
)
|
|
pytest.fail("Should not allow using another key's alias")
|
|
except Exception as e:
|
|
assert "Unique key aliases across all keys are required" in str(e.message)
|
|
|
|
except Exception as e:
|
|
print("Unexpected error:", e)
|
|
pytest.fail(f"An unexpected error occurred: {str(e)}")
|