diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md index ee49e395f3..63d215764c 100644 --- a/docs/my-website/docs/proxy/call_hooks.md +++ b/docs/my-website/docs/proxy/call_hooks.md @@ -1,6 +1,7 @@ # Modify / Reject Incoming Requests -Modify data just before making litellm completion calls call on proxy +- Modify data before making llm api calls on proxy +- Reject data before making llm api calls / before returning the response See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py) @@ -76,3 +77,93 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \ }' ``` + +## *NEW* async_moderation_hook + +Run a moderation check in parallel to the actual LLM API call. + +In your Custom Handler add a new `async_moderation_hook` function + +- This is currently only supported for `/chat/completion` calls. +- This function runs in parallel to the actual LLM API call. +- If your `async_moderation_hook` raises an Exception, we will return that to the user. + + +See a complete example with our [Llama Guard content moderation hook](https://github.com/BerriAI/litellm/blob/main/enterprise/hooks/llama_guard.py) + +```python +from litellm.integrations.custom_logger import CustomLogger +import litellm +from fastapi import HTTPException + +# This file includes the custom callbacks for LiteLLM Proxy +# Once defined, these can be passed in proxy_config.yaml +class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observability/custom_callback#callback-class + # Class variables or attributes + def __init__(self): + pass + + #### ASYNC #### + + async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time): + pass + + async def async_log_pre_api_call(self, model, messages, kwargs): + pass + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + pass + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + pass + + #### CALL HOOKS - proxy only #### + + async def async_pre_call_hook(self, user_api_key_dict: UserAPIKeyAuth, cache: DualCache, data: dict, call_type: Literal["completion", "embeddings"]): + data["model"] = "my-new-model" + return data + + async def async_moderation_hook( ### 👈 KEY CHANGE ### + self, + data: dict, + ): + messages = data["messages"] + print(messages) + if messages[0]["content"] == "hello world": + raise HTTPException( + status_code=400, detail={"error": "Violated content safety policy"} + ) + +proxy_handler_instance = MyCustomHandler() +``` + + +2. Add this file to your proxy config + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo + +litellm_settings: + callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] +``` + +3. Start the server + test the request + +```shell +$ litellm /path/to/config.yaml +``` +```shell +curl --location 'http://0.0.0.0:8000/chat/completions' \ + --data ' { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "Hello world" + } + ], + }' +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index abdb4bc717..83b4099691 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -138,7 +138,7 @@ const sidebars = { }, { "type": "category", - "label": "Admin Controls", + "label": "Content Moderation", "items": [ "proxy/call_hooks", "proxy/rules",