fix(proxy_server.py): fix callback check order

2024-07-05 14:04:28 -07:00 · 2024-07-05 14:04:28 -07:00 · d528b66db0
commit d528b66db0
parent 9c6080be1a
3 changed files with 185 additions and 9 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2753,6 +2753,11 @@ async def chat_completion(
        if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
            data["model"] = litellm.model_alias_map[data["model"]]
        ### CALL HOOKS ### - modify/reject incoming data before calling the model
        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
        )
        ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
        ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
        data["litellm_call_id"] = str(uuid.uuid4())
@ -2765,11 +2770,6 @@ async def chat_completion(
        data["litellm_logging_obj"] = logging_obj
        ### CALL HOOKS ### - modify/reject incoming data before calling the model
        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
        )
        tasks = []
        tasks.append(
            proxy_logging_obj.during_call_hook(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -601,10 +601,14 @@ class ProxyLogging:
            "litellm_logging_obj", None
        )
-        if (
+        if isinstance(original_exception, HTTPException):
-            isinstance(original_exception, HTTPException)
+            if litellm_logging_obj is None:
-            and litellm_logging_obj is not None
+                litellm_logging_obj, data = litellm.utils.function_setup(
-        ):
+                    original_function="IGNORE_THIS",
                    rules_obj=litellm.utils.Rules(),
                    start_time=datetime.now(),
                    **request_data,
                )
            # log the custom exception
            await litellm_logging_obj.async_failure_handler(
                exception=original_exception,
--- a/litellm/tests/test_proxy_reject_logging.py
+++ b/litellm/tests/test_proxy_reject_logging.py
@ -0,0 +1,172 @@
 # What is this?
 ## Unit test that rejected requests are also logged as failures
 # What is this?
 ## This tests the llm guard integration
 import asyncio
 import os
 import random
 # What is this?
 ## Unit test for presidio pii masking
 import sys
 import time
 import traceback
 from datetime import datetime
 from dotenv import load_dotenv
 load_dotenv()
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 from fastapi import Request, Response
 from starlette.datastructures import URL
 import litellm
 from litellm import Router, mock_completion
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
    _ENTERPRISE_SecretDetection,
 )
 from litellm.proxy.proxy_server import (
    Depends,
    HTTPException,
    chat_completion,
    completion,
    embeddings,
 )
 from litellm.proxy.utils import ProxyLogging, hash_token
 from litellm.router import Router
 class testLogger(CustomLogger):
    def __init__(self):
        self.reaches_failure_event = False
    async def async_pre_call_check(self, deployment: dict):
        raise HTTPException(
            status_code=429, detail={"error": "Max parallel request limit reached"}
        )
    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        self.reaches_failure_event = True
        return await super().async_log_failure_event(
            kwargs, response_obj, start_time, end_time
        )
 router = Router(
    model_list=[
        {
            "model_name": "fake-model",
            "litellm_params": {
                "model": "openai/fake",
                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                "api_key": "sk-12345",
            },
        }
    ]
 )
@pytest.mark.parametrize(
    "route, body",
    [
        (
            "/v1/chat/completions",
            {
                "model": "fake-model",
                "messages": [
                    {
                        "role": "user",
                        "content": "Hello here is my OPENAI_API_KEY = sk-12345",
                    }
                ],
            },
        ),
        # ("/v1/completions", {"model": "fake-model", "prompt": "ping"}),
        # (
        #     "/v1/embeddings",
        #     {
        #         "input": "The food was delicious and the waiter...",
        #         "model": "text-embedding-ada-002",
        #         "encoding_format": "float",
        #     },
        # ),
    ],
 )
@pytest.mark.asyncio
 async def test_chat_completion_request_with_redaction(route, body):
    """
    IMPORTANT Enterprise Test - Do not delete it:
    Makes a /chat/completions request on LiteLLM Proxy
    Ensures that the secret is redacted EVEN on the callback
    """
    from litellm.proxy import proxy_server
    setattr(proxy_server, "llm_router", router)
    _test_logger = testLogger()
    litellm.callbacks = [_test_logger]
    litellm.set_verbose = True
    # Prepare the query string
    query_params = "param1=value1&param2=value2"
    # Create the Request object with query parameters
    request = Request(
        scope={
            "type": "http",
            "method": "POST",
            "headers": [(b"content-type", b"application/json")],
            "query_string": query_params.encode(),
        }
    )
    request._url = URL(url=route)
    async def return_body():
        import json
        return json.dumps(body).encode()
    request.body = return_body
    try:
        if route == "/v1/chat/completions":
            response = await chat_completion(
                request=request,
                user_api_key_dict=UserAPIKeyAuth(
                    api_key="sk-12345", token="hashed_sk-12345", rpm_limit=0
                ),
                fastapi_response=Response(),
            )
        elif route == "/v1/completions":
            response = await completion(
                request=request,
                user_api_key_dict=UserAPIKeyAuth(
                    api_key="sk-12345", token="hashed_sk-12345", rpm_limit=0
                ),
                fastapi_response=Response(),
            )
        elif route == "/v1/embeddings":
            response = await embeddings(
                request=request,
                user_api_key_dict=UserAPIKeyAuth(
                    api_key="sk-12345", token="hashed_sk-12345", rpm_limit=0
                ),
                fastapi_response=Response(),
            )
    except:
        pass
    await asyncio.sleep(3)
    assert _test_logger.reaches_failure_event is True