litellm-mirror/litellm/tests/test_key_generate_prisma.py

# Test the following scenarios:
# 1. Generate a Key, and use it to make a call
# 2. Make a call with invalid key, expect it to fail
# 3. Make a call to a key with invalid model - expect to fail
# 4. Make a call to a key with valid model - expect to pass
# 5. Make a call with user over budget, expect to fail
# 6. Make a streaming chat/completions call with user over budget, expect to fail
# 7. Make a call with an key that never expires, expect to pass
# 8. Make a call with an expired key, expect to fail
# 9. Delete a Key
# 10. Generate a key, call key/info. Assert info returned is the same as generated key info
# 11. Generate a Key, cal key/info, call key/update, call key/info
# 12. Make a call with key over budget, expect to fail
# 14. Make a streaming chat/completions call with key over budget, expect to fail
# 15. Generate key, when `allow_user_auth`=False - check if `/key/info` returns key_name=null
# 16. Generate key, when `allow_user_auth`=True - check if `/key/info` returns key_name=sk...<last-4-digits>


# function to call to generate key - async def new_user(data: NewUserRequest):
# function to validate a request - async def user_auth(request: Request):

import sys, os
import traceback
from dotenv import load_dotenv
from fastapi import Request
from datetime import datetime

load_dotenv()
import os, io, time

# this file is to test litellm/proxy

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import pytest, logging, asyncio
import litellm, asyncio
from litellm.proxy.proxy_server import (
    new_user,
    generate_key_fn,
    user_api_key_auth,
    user_update,
    delete_key_fn,
    info_key_fn,
    update_key_fn,
    generate_key_fn,
    generate_key_helper_fn,
    spend_user_fn,
    spend_key_fn,
    view_spend_logs,
    user_info,
)
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token
from litellm._logging import verbose_proxy_logger

verbose_proxy_logger.setLevel(level=logging.DEBUG)

from litellm.proxy._types import (
    NewUserRequest,
    GenerateKeyRequest,
    DynamoDBArgs,
    DeleteKeyRequest,
    UpdateKeyRequest,
    GenerateKeyRequest,
)
from litellm.proxy.utils import DBClient
from starlette.datastructures import URL
from litellm.caching import DualCache

proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())


request_data = {
    "model": "azure-gpt-3.5",
    "messages": [
        {"role": "user", "content": "this is my new test. respond in 50 lines"}
    ],
}


@pytest.fixture
def prisma_client():
    # Assuming DBClient is a class that needs to be instantiated
    prisma_client = PrismaClient(
        database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
    )

    # Reset litellm.proxy.proxy_server.prisma_client to None
    litellm.proxy.proxy_server.custom_db_client = None
    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
        f"litellm-proxy-budget-{time.time()}"
    )
    litellm.proxy.proxy_server.user_custom_key_generate = None

    return prisma_client


def test_generate_and_call_with_valid_key(prisma_client):
    # 1. Generate a Key, and use it to make a call

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            from litellm.proxy.proxy_server import user_api_key_cache

            request = NewUserRequest(user_role="app_owner")
            key = await new_user(request)
            print(key)
            user_id = key.user_id

            # check /user/info to verify user_role was set correctly
            new_user_info = await user_info(user_id=user_id)
            new_user_info = new_user_info["user_info"]
            print("new_user_info=", new_user_info)
            assert new_user_info.user_role == "app_owner"
            assert new_user_info.user_id == user_id

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
            assert (
                hash_token(generated_key)
                in user_api_key_cache.in_memory_cache.cache_dict
            )

            cached_value = user_api_key_cache.in_memory_cache.cache_dict[
                hash_token(generated_key)
            ]

            print("cached value=", cached_value)
            print("cached token", cached_value.token)

            value_from_prisma = valid_token = await prisma_client.get_data(
                token=generated_key,
            )
            print("token from prisma", value_from_prisma)

            assert value_from_prisma.token == cached_value.token

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

        asyncio.run(test())
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


def test_call_with_invalid_key(prisma_client):
    # 2. Make a call with invalid key, expect it to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            generated_key = "bad-key"
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"}, receive=None)
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("got result", result)
            pytest.fail(f"This should have failed!. IT's an invalid key")

        asyncio.run(test())
    except Exception as e:
        print("Got Exception", e)
        print(e.message)
        assert "Authentication Error" in e.message
        pass


def test_call_with_invalid_model(prisma_client):
    # 3. Make a call to a key with an invalid model - expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(models=["mistral"])
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            async def return_body():
                return b'{"model": "gemini-pro-vision"}'

            request.body = return_body

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            pytest.fail(f"This should have failed!. IT's an invalid model")

        asyncio.run(test())
    except Exception as e:
        assert (
            e.message
            == "Authentication Error, API Key not allowed to access model. This token can only access models=['mistral']. Tried to access gemini-pro-vision"
        )
        pass


def test_call_with_valid_model(prisma_client):
    # 4. Make a call to a key with a valid model - expect to pass
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(models=["mistral"])
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            async def return_body():
                return b'{"model": "mistral"}'

            request.body = return_body

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

        asyncio.run(test())
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


def test_call_with_user_over_budget(prisma_client):
    # 5. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(max_budget=0.00001)
            key = await new_user(request)
            print(key)

            generated_key = key.key
            user_id = key.user_id
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": generated_key,
                            "user_api_key_user_id": user_id,
                        }
                    },
                    "response_cost": 0.00002,
                },
                completion_response=resp,
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(5)
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")

        asyncio.run(test())
    except Exception as e:
        error_detail = e.message
        assert "Authentication Error, ExceededBudget:" in error_detail
        print(vars(e))


def test_call_with_end_user_over_budget(prisma_client):
    # Test if a user passed to /chat/completions is tracked & fails whe they cross their budget
    # we only check this when litellm.max_user_budget is set
    import random

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(litellm, "max_user_budget", 0.00001)
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = GenerateKeyRequest()  # create a key with no budget
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key
            user = f"ishaan {random.randint(0, 10000)}"
            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            async def return_body():
                return_string = f'{{"model": "gemini-pro-vision", "user": "{user}"}}'
                # return string as bytes
                return return_string.encode()

            request.body = return_body

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": generated_key,
                            "user_api_key_user_id": user,
                        },
                        "proxy_server_request": {
                            "user": user,
                        },
                    },
                    "response_cost": 10,
                },
                completion_response=resp,
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(5)
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")

        asyncio.run(test())
    except Exception as e:
        error_detail = e.message
        assert "Authentication Error, ExceededBudget:" in error_detail
        print(vars(e))


def test_call_with_proxy_over_budget(prisma_client):
    # 5.1 Make a call with a proxy over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
    setattr(
        litellm.proxy.proxy_server,
        "litellm_proxy_budget_name",
        litellm_proxy_budget_name,
    )
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            ## CREATE PROXY + USER BUDGET ##
            request = NewUserRequest(
                max_budget=0.00001, user_id=litellm_proxy_budget_name
            )
            await new_user(request)
            request = NewUserRequest()
            key = await new_user(request)
            print(key)

            generated_key = key.key
            user_id = key.user_id
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": generated_key,
                            "user_api_key_user_id": user_id,
                        }
                    },
                    "response_cost": 0.00002,
                },
                completion_response=resp,
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(5)
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")

        asyncio.run(test())
    except Exception as e:
        if hasattr(e, "message"):
            error_detail = e.message
        else:
            error_detail = traceback.format_exc()
        assert "Authentication Error, ExceededBudget:" in error_detail
        print(vars(e))


def test_call_with_user_over_budget_stream(prisma_client):
    # 6. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    from litellm._logging import verbose_proxy_logger
    import logging

    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(max_budget=0.00001)
            key = await new_user(request)
            print(key)

            generated_key = key.key
            user_id = key.user_id
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "stream": True,
                    "complete_streaming_response": resp,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": generated_key,
                            "user_api_key_user_id": user_id,
                        }
                    },
                    "response_cost": 0.00002,
                },
                completion_response=ModelResponse(),
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(5)
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")

        asyncio.run(test())
    except Exception as e:
        error_detail = e.message
        assert "Authentication Error, ExceededBudget:" in error_detail
        print(vars(e))


def test_call_with_proxy_over_budget_stream(prisma_client):
    # 6.1 Make a call with a global proxy over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
    setattr(
        litellm.proxy.proxy_server,
        "litellm_proxy_budget_name",
        litellm_proxy_budget_name,
    )
    from litellm._logging import verbose_proxy_logger
    import logging

    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            ## CREATE PROXY + USER BUDGET ##
            request = NewUserRequest(
                max_budget=0.00001, user_id=litellm_proxy_budget_name
            )
            await new_user(request)
            request = NewUserRequest()
            key = await new_user(request)
            print(key)

            generated_key = key.key
            user_id = key.user_id
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage

            resp = ModelResponse(
                id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac",
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "stream": True,
                    "complete_streaming_response": resp,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": generated_key,
                            "user_api_key_user_id": user_id,
                        }
                    },
                    "response_cost": 0.00002,
                },
                completion_response=ModelResponse(),
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(5)
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")

        asyncio.run(test())
    except Exception as e:
        error_detail = e.message
        assert "Authentication Error, ExceededBudget:" in error_detail
        print(vars(e))


def test_generate_and_call_with_valid_key_never_expires(prisma_client):
    # 7. Make a call with an key that never expires, expect to pass

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(duration=None)
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

        asyncio.run(test())
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


def test_generate_and_call_with_expired_key(prisma_client):
    # 8. Make a call with an expired key, expect to fail

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(duration="0s")
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. IT's an expired key")

        asyncio.run(test())
    except Exception as e:
        print("Got Exception", e)
        print(e.message)
        assert "Authentication Error" in e.message
        pass


def test_delete_key(prisma_client):
    # 9. Generate a Key, delete it. Check if deletion works fine

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            from litellm.proxy.proxy_server import user_api_key_cache

            request = NewUserRequest()
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            delete_key_request = DeleteKeyRequest(keys=[generated_key])

            # delete the key
            result_delete_key = await delete_key_fn(data=delete_key_request)
            print("result from delete key", result_delete_key)
            assert result_delete_key == {"deleted_keys": [generated_key]}

            assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
            assert (
                hash_token(generated_key)
                not in user_api_key_cache.in_memory_cache.cache_dict
            )

        asyncio.run(test())
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


def test_delete_key_auth(prisma_client):
    # 10. Generate a Key, delete it, use it to make a call -> expect fail

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            from litellm.proxy.proxy_server import user_api_key_cache

            request = NewUserRequest()
            key = await new_user(request)
            print(key)

            generated_key = key.key
            bearer_token = "Bearer " + generated_key

            delete_key_request = DeleteKeyRequest(keys=[generated_key])

            # delete the key
            result_delete_key = await delete_key_fn(data=delete_key_request)

            print("result from delete key", result_delete_key)
            assert result_delete_key == {"deleted_keys": [generated_key]}

            request = Request(scope={"type": "http"}, receive=None)
            request._url = URL(url="/chat/completions")

            assert generated_key not in user_api_key_cache.in_memory_cache.cache_dict
            assert (
                hash_token(generated_key)
                not in user_api_key_cache.in_memory_cache.cache_dict
            )

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("got result", result)
            pytest.fail(f"This should have failed!. IT's an invalid key")

        asyncio.run(test())
    except Exception as e:
        print("Got Exception", e)
        print(e.message)
        assert "Authentication Error" in e.message
        pass


def test_generate_and_call_key_info(prisma_client):
    # 10. Generate a Key, cal key/info

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(
                metadata={"team": "litellm-team3", "project": "litellm-project3"}
            )
            key = await new_user(request)
            print(key)

            generated_key = key.key

            # use generated key to auth in
            result = await info_key_fn(key=generated_key)
            print("result from info_key_fn", result)
            assert result["key"] == generated_key
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
                "team": "litellm-team3",
                "project": "litellm-project3",
            }

            # cleanup - delete key
            delete_key_request = DeleteKeyRequest(keys=[generated_key])

            # delete the key
            await delete_key_fn(data=delete_key_request)

        asyncio.run(test())
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


def test_generate_and_update_key(prisma_client):
    # 11. Generate a Key, cal key/info, call key/update, call key/info
    # Check if data gets updated
    # Check if untouched data does not get updated

    print("prisma client=", prisma_client)

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = NewUserRequest(
                metadata={"team": "litellm-team3", "project": "litellm-project3"},
                team_id="litellm-core-infra@gmail.com",
            )
            key = await new_user(request)
            print(key)

            generated_key = key.key

            # use generated key to auth in
            result = await info_key_fn(key=generated_key)
            print("result from info_key_fn", result)
            assert result["key"] == generated_key
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
                "team": "litellm-team3",
                "project": "litellm-project3",
            }
            assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"

            request = Request(scope={"type": "http"})
            request._url = URL(url="/update/key")

            # update the key
            await update_key_fn(
                request=Request,
                data=UpdateKeyRequest(
                    key=generated_key,
                    models=["ada", "babbage", "curie", "davinci"],
                ),
            )

            # get info on key after update
            result = await info_key_fn(key=generated_key)
            print("result from info_key_fn", result)
            assert result["key"] == generated_key
            print("\n info for key=", result["info"])
            assert result["info"]["max_parallel_requests"] == None
            assert result["info"]["metadata"] == {
                "team": "litellm-team3",
                "project": "litellm-project3",
            }
            assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]

            # cleanup - delete key
            delete_key_request = DeleteKeyRequest(keys=[generated_key])

            # delete the key
            await delete_key_fn(data=delete_key_request)

        asyncio.run(test())
    except Exception as e:
        print("Got Exception", e)
        print(e.message)
        pytest.fail(f"An exception occurred - {str(e)}")


def test_key_generate_with_custom_auth(prisma_client):
    # custom - generate key function
    async def custom_generate_key_fn(data: GenerateKeyRequest) -> dict:
        """
        Asynchronous function for generating a key based on the input data.

        Args:
            data (GenerateKeyRequest): The input data for key generation.

        Returns:
            dict: A dictionary containing the decision and an optional message.
            {
                "decision": False,
                "message": "This violates LiteLLM Proxy Rules. No team id provided.",
            }
        """

        # decide if a key should be generated or not
        print("using custom auth function!")
        data_json = data.json()  # type: ignore

        # Unpacking variables
        team_id = data_json.get("team_id")
        duration = data_json.get("duration")
        models = data_json.get("models")
        aliases = data_json.get("aliases")
        config = data_json.get("config")
        spend = data_json.get("spend")
        user_id = data_json.get("user_id")
        max_parallel_requests = data_json.get("max_parallel_requests")
        metadata = data_json.get("metadata")
        tpm_limit = data_json.get("tpm_limit")
        rpm_limit = data_json.get("rpm_limit")

        if team_id is not None and team_id == "litellm-core-infra@gmail.com":
            # only team_id="litellm-core-infra@gmail.com" can make keys
            return {
                "decision": True,
            }
        else:
            print("Failed custom auth")
            return {
                "decision": False,
                "message": "This violates LiteLLM Proxy Rules. No team id provided.",
            }

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(
        litellm.proxy.proxy_server, "user_custom_key_generate", custom_generate_key_fn
    )
    try:

        async def test():
            try:
                await litellm.proxy.proxy_server.prisma_client.connect()
                request = GenerateKeyRequest()
                key = await generate_key_fn(request)
                pytest.fail(f"Expected an exception. Got {key}")
            except Exception as e:
                # this should fail
                print("Got Exception", e)
                print(e.message)
                print("First request failed!. This is expected")
                assert (
                    "This violates LiteLLM Proxy Rules. No team id provided."
                    in e.message
                )

            request_2 = GenerateKeyRequest(
                team_id="litellm-core-infra@gmail.com",
            )

            key = await generate_key_fn(request_2)
            print(key)
            generated_key = key.key

        asyncio.run(test())
    except Exception as e:
        print("Got Exception", e)
        print(e.message)
        pytest.fail(f"An exception occurred - {str(e)}")


def test_call_with_key_over_budget(prisma_client):
    # 12. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:

        async def test():
            await litellm.proxy.proxy_server.prisma_client.connect()
            request = GenerateKeyRequest(max_budget=0.00001)
            key = await generate_key_fn(request)
            print(key)

            generated_key = key.key
            user_id = key.user_id
            bearer_token = "Bearer " + generated_key

            request = Request(scope={"type": "http"})
            request._url = URL(url="/chat/completions")

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)

            # update spend using track_cost callback, make 2nd request, it should fail
            from litellm.proxy.proxy_server import (
                _PROXY_track_cost_callback as track_cost_callback,
            )
            from litellm import ModelResponse, Choices, Message, Usage
            from litellm.caching import Cache

            litellm.cache = Cache()
            import time

            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"

            resp = ModelResponse(
                id=request_id,
                choices=[
                    Choices(
                        finish_reason=None,
                        index=0,
                        message=Message(
                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                            role="assistant",
                        ),
                    )
                ],
                model="gpt-35-turbo",  # azure always has model written like this
                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
            )
            await track_cost_callback(
                kwargs={
                    "model": "chatgpt-v-2",
                    "stream": False,
                    "litellm_params": {
                        "metadata": {
                            "user_api_key": hash_token(generated_key),
                            "user_api_key_user_id": user_id,
                        }
                    },
                    "response_cost": 0.00002,
                },
                completion_response=resp,
                start_time=datetime.now(),
                end_time=datetime.now(),
            )
            await asyncio.sleep(10)
            # test spend_log was written and we can read it
            spend_logs = await view_spend_logs(request_id=request_id)

            print("read spend logs", spend_logs)
            assert len(spend_logs) == 1

            spend_log = spend_logs[0]

            assert spend_log.request_id == request_id
            assert spend_log.spend == float("2e-05")
            assert spend_log.model == "chatgpt-v-2"
            assert (
                spend_log.cache_key
                == "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
            )

            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
            pytest.fail(f"This should have failed!. They key crossed it's budget")

        asyncio.run(test())
    except Exception as e:
        # print(f"Error - {str(e)}")
        traceback.print_exc()
        error_detail = e.message
        assert "Authentication Error, ExceededTokenBudget:" in error_detail
        print(vars(e))


@pytest.mark.asyncio()
async def test_call_with_key_never_over_budget(prisma_client):
    # Make a call with a key with budget=None, it should never fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    try:
        await litellm.proxy.proxy_server.prisma_client.connect()
        request = GenerateKeyRequest(max_budget=None)
        key = await generate_key_fn(request)
        print(key)

        generated_key = key.key
        user_id = key.user_id
        bearer_token = "Bearer " + generated_key

        request = Request(scope={"type": "http"})
        request._url = URL(url="/chat/completions")

        # use generated key to auth in
        result = await user_api_key_auth(request=request, api_key=bearer_token)
        print("result from user auth with new key", result)

        # update spend using track_cost callback, make 2nd request, it should fail
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
        from litellm import ModelResponse, Choices, Message, Usage
        import time

        request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"

        resp = ModelResponse(
            id=request_id,
            choices=[
                Choices(
                    finish_reason=None,
                    index=0,
                    message=Message(
                        content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                        role="assistant",
                    ),
                )
            ],
            model="gpt-35-turbo",  # azure always has model written like this
            usage=Usage(
                prompt_tokens=210000, completion_tokens=200000, total_tokens=41000
            ),
        )
        await track_cost_callback(
            kwargs={
                "model": "chatgpt-v-2",
                "stream": False,
                "litellm_params": {
                    "metadata": {
                        "user_api_key": hash_token(generated_key),
                        "user_api_key_user_id": user_id,
                    }
                },
                "response_cost": 200000,
            },
            completion_response=resp,
            start_time=datetime.now(),
            end_time=datetime.now(),
        )
        await asyncio.sleep(5)
        # use generated key to auth in
        result = await user_api_key_auth(request=request, api_key=bearer_token)
        print("result from user auth with new key", result)
    except Exception as e:
        pytest.fail(f"This should have not failed!. They key uses max_budget=None. {e}")


@pytest.mark.asyncio()
async def test_call_with_key_over_budget_stream(prisma_client):
    # 14. Make a call with a key over budget, expect to fail
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    from litellm._logging import verbose_proxy_logger
    import logging

    litellm.set_verbose = True
    verbose_proxy_logger.setLevel(logging.DEBUG)
    try:
        await litellm.proxy.proxy_server.prisma_client.connect()
        request = GenerateKeyRequest(max_budget=0.00001)
        key = await generate_key_fn(request)
        print(key)

        generated_key = key.key
        user_id = key.user_id
        bearer_token = "Bearer " + generated_key

        request = Request(scope={"type": "http"})
        request._url = URL(url="/chat/completions")

        # use generated key to auth in
        result = await user_api_key_auth(request=request, api_key=bearer_token)
        print("result from user auth with new key", result)

        # update spend using track_cost callback, make 2nd request, it should fail
        from litellm.proxy.proxy_server import (
            _PROXY_track_cost_callback as track_cost_callback,
        )
        from litellm import ModelResponse, Choices, Message, Usage
        import time

        request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
        resp = ModelResponse(
            id=request_id,
            choices=[
                Choices(
                    finish_reason=None,
                    index=0,
                    message=Message(
                        content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
                        role="assistant",
                    ),
                )
            ],
            model="gpt-35-turbo",  # azure always has model written like this
            usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
        )
        await track_cost_callback(
            kwargs={
                "call_type": "acompletion",
                "model": "sagemaker-chatgpt-v-2",
                "stream": True,
                "complete_streaming_response": resp,
                "litellm_params": {
                    "metadata": {
                        "user_api_key": hash_token(generated_key),
                        "user_api_key_user_id": user_id,
                    }
                },
                "response_cost": 0.00005,
            },
            completion_response=resp,
            start_time=datetime.now(),
            end_time=datetime.now(),
        )
        await asyncio.sleep(5)
        # use generated key to auth in
        result = await user_api_key_auth(request=request, api_key=bearer_token)
        print("result from user auth with new key", result)
        pytest.fail(f"This should have failed!. They key crossed it's budget")

    except Exception as e:
        print("Got Exception", e)
        error_detail = e.message
        assert "Authentication Error, ExceededTokenBudget:" in error_detail
        print(vars(e))


@pytest.mark.asyncio()
async def test_view_spend_per_user(prisma_client):
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        user_by_spend = await spend_user_fn(user_id=None)
        assert type(user_by_spend) == list
        assert len(user_by_spend) > 0
        first_user = user_by_spend[0]

        print("\nfirst_user=", first_user)
        assert first_user.spend > 0
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


@pytest.mark.asyncio()
async def test_view_spend_per_key(prisma_client):
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        key_by_spend = await spend_key_fn()
        assert type(key_by_spend) == list
        assert len(key_by_spend) > 0
        first_key = key_by_spend[0]

        print("\nfirst_key=", first_key)
        assert first_key.spend > 0
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


@pytest.mark.asyncio()
async def test_key_name_null(prisma_client):
    """
    - create key
    - get key info
    - assert key_name is null
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        request = GenerateKeyRequest()
        key = await generate_key_fn(request)
        generated_key = key.key
        result = await info_key_fn(key=generated_key)
        print("result from info_key_fn", result)
        assert result["info"]["key_name"] is None
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


@pytest.mark.asyncio()
async def test_key_name_set(prisma_client):
    """
    - create key
    - get key info
    - assert key_name is not null
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        request = GenerateKeyRequest()
        key = await generate_key_fn(request)
        generated_key = key.key
        result = await info_key_fn(key=generated_key)
        print("result from info_key_fn", result)
        assert isinstance(result["info"]["key_name"], str)
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


@pytest.mark.asyncio()
async def test_default_key_params(prisma_client):
    """
    - create key
    - get key info
    - assert key_name is not null
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
    litellm.default_key_generate_params = {"max_budget": 0.000122}
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        request = GenerateKeyRequest()
        key = await generate_key_fn(request)
        generated_key = key.key
        result = await info_key_fn(key=generated_key)
        print("result from info_key_fn", result)
        assert result["info"]["max_budget"] == 0.000122
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


@pytest.mark.asyncio()
async def test_upperbound_key_params(prisma_client):
    """
    - create key
    - get key info
    - assert key_name is not null
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    litellm.upperbound_key_generate_params = {
        "max_budget": 0.001,
        "budget_duration": "1m",
    }
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        request = GenerateKeyRequest(
            max_budget=200000,
            budget_duration="30d",
        )
        key = await generate_key_fn(request)
        generated_key = key.key

        result = await info_key_fn(key=generated_key)
        key_info = result["info"]
        # assert it used the upper bound for max_budget, and budget_duration
        assert key_info["max_budget"] == 0.001
        assert key_info["budget_duration"] == "1m"

        print(result)
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


def test_get_bearer_token():
    from litellm.proxy.proxy_server import _get_bearer_token

    # Test valid Bearer token
    api_key = "Bearer valid_token"
    result = _get_bearer_token(api_key)
    assert result == "valid_token", f"Expected 'valid_token', got '{result}'"

    # Test empty API key
    api_key = ""
    result = _get_bearer_token(api_key)
    assert result == "", f"Expected '', got '{result}'"

    # Test API key without Bearer prefix
    api_key = "invalid_token"
    result = _get_bearer_token(api_key)
    assert result == "", f"Expected '', got '{result}'"

    # Test API key with Bearer prefix in lowercase
    api_key = "bearer valid_token"
    result = _get_bearer_token(api_key)
    assert result == "", f"Expected '', got '{result}'"

    # Test API key with Bearer prefix and extra spaces
    api_key = "  Bearer   valid_token  "
    result = _get_bearer_token(api_key)
    assert result == "", f"Expected '', got '{result}'"

    # Test API key with Bearer prefix and no token
    api_key = "Bearer sk-1234"
    result = _get_bearer_token(api_key)
    assert result == "sk-1234", f"Expected 'valid_token', got '{result}'"


@pytest.mark.asyncio
async def test_user_api_key_auth(prisma_client):
    from litellm.proxy.proxy_server import ProxyException

    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
    await litellm.proxy.proxy_server.prisma_client.connect()

    request = Request(scope={"type": "http"})
    request._url = URL(url="/chat/completions")
    # Test case: No API Key passed in
    try:
        await user_api_key_auth(request, api_key=None)
        pytest.fail(f"This should have failed!. IT's an invalid key")
    except ProxyException as exc:
        print(exc.message)
        assert exc.message == "Authentication Error, No api key passed in."

    # Test case: Malformed API Key (missing 'Bearer ' prefix)
    try:
        await user_api_key_auth(request, api_key="my_token")
        pytest.fail(f"This should have failed!. IT's an invalid key")
    except ProxyException as exc:
        print(exc.message)
        assert (
            exc.message
            == "Authentication Error, Malformed API Key passed in. Ensure Key has `Bearer ` prefix. Passed in: my_token"
        )

    # Test case: User passes empty string API Key
    try:
        await user_api_key_auth(request, api_key="")
        pytest.fail(f"This should have failed!. IT's an invalid key")
    except ProxyException as exc:
        print(exc.message)
        assert (
            exc.message
            == "Authentication Error, Malformed API Key passed in. Ensure Key has `Bearer ` prefix. Passed in: "
        )


@pytest.mark.asyncio
async def test_user_api_key_auth_without_master_key(prisma_client):
    # if master key is not set, expect all calls to go through
    try:
        from litellm.proxy.proxy_server import ProxyException

        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
        setattr(litellm.proxy.proxy_server, "master_key", None)
        setattr(
            litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True}
        )
        await litellm.proxy.proxy_server.prisma_client.connect()

        request = Request(scope={"type": "http"})
        request._url = URL(url="/chat/completions")
        # Test case: No API Key passed in

        await user_api_key_auth(request, api_key=None)
        await user_api_key_auth(request, api_key="my_token")
        await user_api_key_auth(request, api_key="")
        await user_api_key_auth(request, api_key="Bearer " + "1234")
    except Exception as e:
        print("Got Exception", e)
        pytest.fail(f"Got exception {e}")


@pytest.mark.asyncio
async def test_key_with_no_permissions(prisma_client):
    """
    - create key
    - get key info
    - assert key_name is null
    """
    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
    await litellm.proxy.proxy_server.prisma_client.connect()
    try:
        response = await generate_key_helper_fn(
            **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"}  # type: ignore
        )

        print(response)
        key = response["token"]

        # make a /chat/completions call -> it should fail
        request = Request(scope={"type": "http"})
        request._url = URL(url="/chat/completions")

        # use generated key to auth in
        result = await user_api_key_auth(request=request, api_key="Bearer " + key)
        print("result from user auth with new key", result)
        pytest.fail(f"This should have failed!. IT's an invalid key")
    except Exception as e:
        print("Got Exception", e)
        print(e.message)