litellm/tests/local_testing/test_rules.py

#### What this tests ####
#    This tests setting rules before / after making llm api calls
import asyncio
import os
import sys
import time
import traceback

import pytest

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import litellm
from litellm import acompletion, completion


def my_pre_call_rule(input: str):
    print(f"input: {input}")
    print(f"INSIDE MY PRE CALL RULE, len(input) - {len(input)}")
    if len(input) > 10:
        return False
    return True


## Test 1: Pre-call rule
def test_pre_call_rule():
    try:
        litellm.pre_call_rules = [my_pre_call_rule]
        ### completion
        response = completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "say something inappropriate"}],
        )
        pytest.fail(f"Completion call should have been failed. ")
    except Exception:
        pass

    ### async completion
    async def test_async_response():
        user_message = "Hello, how are you?"
        messages = [{"content": user_message, "role": "user"}]
        try:
            response = await acompletion(model="gpt-3.5-turbo", messages=messages)
            pytest.fail(f"acompletion call should have been failed. ")
        except Exception as e:
            pass

    asyncio.run(test_async_response())
    litellm.pre_call_rules = []


def my_post_call_rule(input: str):
    input = input.lower()
    print(f"input: {input}")
    print(f"INSIDE MY POST CALL RULE, len(input) - {len(input)}")
    if len(input) < 200:
        return {
            "decision": False,
            "message": "This violates LiteLLM Proxy Rules. Response too short",
        }
    return {"decision": True}


def my_post_call_rule_2(input: str):
    input = input.lower()
    print(f"input: {input}")
    print(f"INSIDE MY POST CALL RULE, len(input) - {len(input)}")
    if len(input) < 200 and len(input) > 0:
        return {
            "decision": False,
            "message": "This violates LiteLLM Proxy Rules. Response too short",
        }
    return {"decision": True}


# test_pre_call_rule()
# Test 2: Post-call rule
# commenting out of ci/cd since llm's have variable output which was causing our pipeline to fail erratically.
def test_post_call_rule():
    try:
        litellm.pre_call_rules = []
        litellm.post_call_rules = [my_post_call_rule]
        ### completion
        response = completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "say sorry"}],
            max_tokens=2,
        )
        pytest.fail(f"Completion call should have been failed. ")
    except Exception as e:
        print("Got exception", e)
        print(type(e))
        print(vars(e))
        assert e.message == "This violates LiteLLM Proxy Rules. Response too short"
        pass
    # print(f"MAKING ACOMPLETION CALL")
    # litellm.set_verbose = True
    ### async completion
    # async def test_async_response():
    #     messages=[{"role": "user", "content": "say sorry"}]
    #     try:
    #         response = await acompletion(model="gpt-3.5-turbo", messages=messages)
    #         pytest.fail(f"acompletion call should have been failed.")
    #     except Exception as e:
    #         pass
    # asyncio.run(test_async_response())
    litellm.pre_call_rules = []
    litellm.post_call_rules = []


# test_post_call_rule()


def test_post_call_rule_streaming():
    try:
        litellm.pre_call_rules = []
        litellm.post_call_rules = [my_post_call_rule_2]
        ### completion
        response = completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "say sorry"}],
            max_tokens=2,
            stream=True,
        )
        for chunk in response:
            print(f"chunk: {chunk}")
        pytest.fail(f"Completion call should have been failed. ")
    except Exception as e:
        print("Got exception", e)
        print(type(e))
        print(vars(e))
        assert "This violates LiteLLM Proxy Rules. Response too short" in e.message


@pytest.mark.asyncio
async def test_post_call_processing_error_async_response():
    try:
        response = await acompletion(
            model="command-nightly",  # Just used as an example
            messages=[{"content": "Hello, how are you?", "role": "user"}],
            api_base="https://openai-proxy.berriai.repl.co",  # Just used as an example
            custom_llm_provider="openai",
        )
        pytest.fail("This call should have failed")
    except Exception as e:
        pass