diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index a4f3ea7b1..e0c5374f0 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -12,14 +12,16 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se ::: Features: -- [ ] Content Moderation with LlamaGuard -- [ ] Content Moderation with Google Text Moderations -- [ ] Content Moderation with LLM Guard -- [ ] Reject calls from Blocked User list -- [ ] Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors) -- [ ] Tracking Spend for Custom Tags +- ✅ Content Moderation with LlamaGuard +- ✅ Content Moderation with Google Text Moderations +- ✅ Content Moderation with LLM Guard +- ✅ Reject calls from Blocked User list +- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors) +- ✅ Don't log/store specific requests (eg confidential LLM requests) +- ✅ Tracking Spend for Custom Tags -## Content Moderation with LlamaGuard +## Content Moderation +### Content Moderation with LlamaGuard Currently works with Sagemaker's LlamaGuard endpoint. @@ -39,7 +41,7 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = "" os.environ["AWS_REGION_NAME"] = "" ``` -### Customize LlamaGuard prompt +#### Customize LlamaGuard prompt To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt) @@ -51,7 +53,7 @@ callbacks: ["llamaguard_moderations"] llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt ``` -## Content Moderation with LLM Guard +### Content Moderation with LLM Guard Set the LLM Guard API Base in your environment @@ -78,7 +80,7 @@ Expected results: LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }} ``` -## Content Moderation with Google Text Moderation +### Content Moderation with Google Text Moderation Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI). @@ -89,7 +91,7 @@ litellm_settings: callbacks: ["google_text_moderation"] ``` -### Set custom confidence thresholds +#### Set custom confidence thresholds Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores) @@ -133,6 +135,33 @@ Here are the category specific values: | "legal" | legal_threshold: 0.1 | +## Incognito Requests - Don't log anything + +When `no-log=True`, the request will **not be logged on any callbacks** and there will be **no server logs on litellm** + +```python +import openai +client = openai.OpenAI( + api_key="anything", # proxy api-key + base_url="http://0.0.0.0:8000" # litellm proxy +) + +response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ], + extra_body={ + "no-log": True + } +) + +print(response) +``` + ## Enable Blocked User Lists If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features diff --git a/litellm/_logging.py b/litellm/_logging.py index 438fa9743..26693c15e 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -31,6 +31,18 @@ def _turn_on_debug(): verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug +def _disable_debugging(): + verbose_logger.disabled = True + verbose_router_logger.disabled = True + verbose_proxy_logger.disabled = True + + +def _enable_debugging(): + verbose_logger.disabled = False + verbose_router_logger.disabled = False + verbose_proxy_logger.disabled = False + + def print_verbose(print_statement): try: if set_verbose: diff --git a/litellm/main.py b/litellm/main.py index 63649844a..c8a8dbc1c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -487,6 +487,8 @@ def completion( ### ASYNC CALLS ### acompletion = kwargs.get("acompletion", False) client = kwargs.get("client", None) + ### Admin Controls ### + no_log = kwargs.get("no-log", False) ######## end of unpacking kwargs ########### openai_params = [ "functions", @@ -563,6 +565,7 @@ def completion( "caching_groups", "ttl", "cache", + "no-log", ] default_params = openai_params + litellm_params non_default_params = { @@ -726,6 +729,7 @@ def completion( model_info=model_info, proxy_server_request=proxy_server_request, preset_cache_key=preset_cache_key, + no_log=no_log, ) logging.update_environment_variables( model=model, @@ -2417,6 +2421,7 @@ def embedding( "caching_groups", "ttl", "cache", + "no-log", ] default_params = openai_params + litellm_params non_default_params = { diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 654a50b2f..76c9ed04c 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -5,12 +5,9 @@ model_list: api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY api_version: "2023-07-01-preview" - - model_name: azure-gpt-3.5 - litellm_params: - model: gpt-3.5-turbo - api_key: os.environ/OPENAI_API_KEY - model_info: - access_groups: ["public"] +litellm_settings: + set_verbose: True + success_callback: ["langfuse"] router_settings: set_verbose: True debug_level: "DEBUG" \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index d9642b810..67d4f442b 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1675,9 +1675,9 @@ class ProxyConfig: # these are litellm callbacks - "langfuse", "sentry", "wandb" else: litellm.success_callback.append(callback) - verbose_proxy_logger.debug( + print( # noqa f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}" - ) + ) # noqa elif key == "failure_callback": litellm.failure_callback = [] diff --git a/litellm/utils.py b/litellm/utils.py index 38836a4bc..32f0f765b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1279,6 +1279,15 @@ class Logging: for callback in callbacks: try: + litellm_params = self.model_call_details.get("litellm_params", {}) + if litellm_params.get("no-log", False) == True: + # proxy cost tracking cal backs should run + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + print_verbose("no-log request, skipping logging") + continue if callback == "lite_debugger": print_verbose("reaches lite_debugger for logging!") print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") @@ -1707,7 +1716,20 @@ class Logging: callbacks = litellm._async_success_callback verbose_logger.debug(f"Async success callbacks: {callbacks}") for callback in callbacks: + # check if callback can run for this request + litellm_params = self.model_call_details.get("litellm_params", {}) + if litellm_params.get("no-log", False) == True: + # proxy cost tracking cal backs should run + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + print_verbose("no-log request, skipping logging") + continue try: + if kwargs.get("no-log", False) == True: + print_verbose("no-log request, skipping logging") + continue if callback == "cache" and litellm.cache is not None: # set_cache once complete streaming response is built print_verbose("async success_callback: reaches cache for logging!") @@ -2985,11 +3007,13 @@ def client(original_function): print_verbose( f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}" ) + # check if user does not want this to be logged asyncio.create_task( logging_obj.async_success_handler(result, start_time, end_time) ) threading.Thread( - target=logging_obj.success_handler, args=(result, start_time, end_time) + target=logging_obj.success_handler, + args=(result, start_time, end_time), ).start() # RETURN RESULT @@ -3892,6 +3916,7 @@ def get_litellm_params( proxy_server_request=None, acompletion=None, preset_cache_key=None, + no_log=None, ): litellm_params = { "acompletion": acompletion, @@ -3908,6 +3933,7 @@ def get_litellm_params( "model_info": model_info, "proxy_server_request": proxy_server_request, "preset_cache_key": preset_cache_key, + "no-log": no_log, "stream_response": {}, # litellm_call_id: ModelResponse Dict }