fix(proxy_server.py): fix callback check order

This commit is contained in:
Krrish Dholakia 2024-07-05 14:04:28 -07:00
parent 9c6080be1a
commit d528b66db0
3 changed files with 185 additions and 9 deletions

View file

@ -2753,6 +2753,11 @@ async def chat_completion(
if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map: if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
data["model"] = litellm.model_alias_map[data["model"]] data["model"] = litellm.model_alias_map[data["model"]]
### CALL HOOKS ### - modify/reject incoming data before calling the model
data = await proxy_logging_obj.pre_call_hook( # type: ignore
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
)
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse. ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
data["litellm_call_id"] = str(uuid.uuid4()) data["litellm_call_id"] = str(uuid.uuid4())
@ -2765,11 +2770,6 @@ async def chat_completion(
data["litellm_logging_obj"] = logging_obj data["litellm_logging_obj"] = logging_obj
### CALL HOOKS ### - modify/reject incoming data before calling the model
data = await proxy_logging_obj.pre_call_hook( # type: ignore
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
)
tasks = [] tasks = []
tasks.append( tasks.append(
proxy_logging_obj.during_call_hook( proxy_logging_obj.during_call_hook(

View file

@ -601,10 +601,14 @@ class ProxyLogging:
"litellm_logging_obj", None "litellm_logging_obj", None
) )
if ( if isinstance(original_exception, HTTPException):
isinstance(original_exception, HTTPException) if litellm_logging_obj is None:
and litellm_logging_obj is not None litellm_logging_obj, data = litellm.utils.function_setup(
): original_function="IGNORE_THIS",
rules_obj=litellm.utils.Rules(),
start_time=datetime.now(),
**request_data,
)
# log the custom exception # log the custom exception
await litellm_logging_obj.async_failure_handler( await litellm_logging_obj.async_failure_handler(
exception=original_exception, exception=original_exception,

View file

@ -0,0 +1,172 @@
# What is this?
## Unit test that rejected requests are also logged as failures
# What is this?
## This tests the llm guard integration
import asyncio
import os
import random
# What is this?
## Unit test for presidio pii masking
import sys
import time
import traceback
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
from fastapi import Request, Response
from starlette.datastructures import URL
import litellm
from litellm import Router, mock_completion
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
_ENTERPRISE_SecretDetection,
)
from litellm.proxy.proxy_server import (
Depends,
HTTPException,
chat_completion,
completion,
embeddings,
)
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.router import Router
class testLogger(CustomLogger):
def __init__(self):
self.reaches_failure_event = False
async def async_pre_call_check(self, deployment: dict):
raise HTTPException(
status_code=429, detail={"error": "Max parallel request limit reached"}
)
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
self.reaches_failure_event = True
return await super().async_log_failure_event(
kwargs, response_obj, start_time, end_time
)
router = Router(
model_list=[
{
"model_name": "fake-model",
"litellm_params": {
"model": "openai/fake",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"api_key": "sk-12345",
},
}
]
)
@pytest.mark.parametrize(
"route, body",
[
(
"/v1/chat/completions",
{
"model": "fake-model",
"messages": [
{
"role": "user",
"content": "Hello here is my OPENAI_API_KEY = sk-12345",
}
],
},
),
# ("/v1/completions", {"model": "fake-model", "prompt": "ping"}),
# (
# "/v1/embeddings",
# {
# "input": "The food was delicious and the waiter...",
# "model": "text-embedding-ada-002",
# "encoding_format": "float",
# },
# ),
],
)
@pytest.mark.asyncio
async def test_chat_completion_request_with_redaction(route, body):
"""
IMPORTANT Enterprise Test - Do not delete it:
Makes a /chat/completions request on LiteLLM Proxy
Ensures that the secret is redacted EVEN on the callback
"""
from litellm.proxy import proxy_server
setattr(proxy_server, "llm_router", router)
_test_logger = testLogger()
litellm.callbacks = [_test_logger]
litellm.set_verbose = True
# Prepare the query string
query_params = "param1=value1&param2=value2"
# Create the Request object with query parameters
request = Request(
scope={
"type": "http",
"method": "POST",
"headers": [(b"content-type", b"application/json")],
"query_string": query_params.encode(),
}
)
request._url = URL(url=route)
async def return_body():
import json
return json.dumps(body).encode()
request.body = return_body
try:
if route == "/v1/chat/completions":
response = await chat_completion(
request=request,
user_api_key_dict=UserAPIKeyAuth(
api_key="sk-12345", token="hashed_sk-12345", rpm_limit=0
),
fastapi_response=Response(),
)
elif route == "/v1/completions":
response = await completion(
request=request,
user_api_key_dict=UserAPIKeyAuth(
api_key="sk-12345", token="hashed_sk-12345", rpm_limit=0
),
fastapi_response=Response(),
)
elif route == "/v1/embeddings":
response = await embeddings(
request=request,
user_api_key_dict=UserAPIKeyAuth(
api_key="sk-12345", token="hashed_sk-12345", rpm_limit=0
),
fastapi_response=Response(),
)
except:
pass
await asyncio.sleep(3)
assert _test_logger.reaches_failure_event is True