Merge pull request #2408 from BerriAI/litellm_no_store_reqs

[FEAT-liteLLM Proxy] Incognito Requests -  Don't log anything
This commit is contained in:
Ishaan Jaff 2024-03-08 21:11:43 -08:00 committed by GitHub
commit 8036b48f14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 89 additions and 20 deletions

View file

@ -12,14 +12,16 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se
::: :::
Features: Features:
- [ ] Content Moderation with LlamaGuard - ✅ Content Moderation with LlamaGuard
- [ ] Content Moderation with Google Text Moderations - ✅ Content Moderation with Google Text Moderations
- [ ] Content Moderation with LLM Guard - ✅ Content Moderation with LLM Guard
- [ ] Reject calls from Blocked User list - ✅ Reject calls from Blocked User list
- [ ] Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors) - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
- [ ] Tracking Spend for Custom Tags - ✅ Don't log/store specific requests (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags
## Content Moderation with LlamaGuard ## Content Moderation
### Content Moderation with LlamaGuard
Currently works with Sagemaker's LlamaGuard endpoint. Currently works with Sagemaker's LlamaGuard endpoint.
@ -39,7 +41,7 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = "" os.environ["AWS_REGION_NAME"] = ""
``` ```
### Customize LlamaGuard prompt #### Customize LlamaGuard prompt
To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt) To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt)
@ -51,7 +53,7 @@ callbacks: ["llamaguard_moderations"]
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
``` ```
## Content Moderation with LLM Guard ### Content Moderation with LLM Guard
Set the LLM Guard API Base in your environment Set the LLM Guard API Base in your environment
@ -78,7 +80,7 @@ Expected results:
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }} LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
``` ```
## Content Moderation with Google Text Moderation ### Content Moderation with Google Text Moderation
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI). Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
@ -89,7 +91,7 @@ litellm_settings:
callbacks: ["google_text_moderation"] callbacks: ["google_text_moderation"]
``` ```
### Set custom confidence thresholds #### Set custom confidence thresholds
Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores) Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores)
@ -133,6 +135,33 @@ Here are the category specific values:
| "legal" | legal_threshold: 0.1 | | "legal" | legal_threshold: 0.1 |
## Incognito Requests - Don't log anything
When `no-log=True`, the request will **not be logged on any callbacks** and there will be **no server logs on litellm**
```python
import openai
client = openai.OpenAI(
api_key="anything", # proxy api-key
base_url="http://0.0.0.0:8000" # litellm proxy
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"no-log": True
}
)
print(response)
```
## Enable Blocked User Lists ## Enable Blocked User Lists
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features

View file

@ -31,6 +31,18 @@ def _turn_on_debug():
verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug
def _disable_debugging():
verbose_logger.disabled = True
verbose_router_logger.disabled = True
verbose_proxy_logger.disabled = True
def _enable_debugging():
verbose_logger.disabled = False
verbose_router_logger.disabled = False
verbose_proxy_logger.disabled = False
def print_verbose(print_statement): def print_verbose(print_statement):
try: try:
if set_verbose: if set_verbose:

View file

@ -487,6 +487,8 @@ def completion(
### ASYNC CALLS ### ### ASYNC CALLS ###
acompletion = kwargs.get("acompletion", False) acompletion = kwargs.get("acompletion", False)
client = kwargs.get("client", None) client = kwargs.get("client", None)
### Admin Controls ###
no_log = kwargs.get("no-log", False)
######## end of unpacking kwargs ########### ######## end of unpacking kwargs ###########
openai_params = [ openai_params = [
"functions", "functions",
@ -563,6 +565,7 @@ def completion(
"caching_groups", "caching_groups",
"ttl", "ttl",
"cache", "cache",
"no-log",
] ]
default_params = openai_params + litellm_params default_params = openai_params + litellm_params
non_default_params = { non_default_params = {
@ -726,6 +729,7 @@ def completion(
model_info=model_info, model_info=model_info,
proxy_server_request=proxy_server_request, proxy_server_request=proxy_server_request,
preset_cache_key=preset_cache_key, preset_cache_key=preset_cache_key,
no_log=no_log,
) )
logging.update_environment_variables( logging.update_environment_variables(
model=model, model=model,
@ -2417,6 +2421,7 @@ def embedding(
"caching_groups", "caching_groups",
"ttl", "ttl",
"cache", "cache",
"no-log",
] ]
default_params = openai_params + litellm_params default_params = openai_params + litellm_params
non_default_params = { non_default_params = {

View file

@ -5,12 +5,9 @@ model_list:
api_base: os.environ/AZURE_API_BASE api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview" api_version: "2023-07-01-preview"
- model_name: azure-gpt-3.5 litellm_settings:
litellm_params: set_verbose: True
model: gpt-3.5-turbo success_callback: ["langfuse"]
api_key: os.environ/OPENAI_API_KEY
model_info:
access_groups: ["public"]
router_settings: router_settings:
set_verbose: True set_verbose: True
debug_level: "DEBUG" debug_level: "DEBUG"

View file

@ -1675,9 +1675,9 @@ class ProxyConfig:
# these are litellm callbacks - "langfuse", "sentry", "wandb" # these are litellm callbacks - "langfuse", "sentry", "wandb"
else: else:
litellm.success_callback.append(callback) litellm.success_callback.append(callback)
verbose_proxy_logger.debug( print( # noqa
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}" f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
) ) # noqa
elif key == "failure_callback": elif key == "failure_callback":
litellm.failure_callback = [] litellm.failure_callback = []

View file

@ -1279,6 +1279,15 @@ class Logging:
for callback in callbacks: for callback in callbacks:
try: try:
litellm_params = self.model_call_details.get("litellm_params", {})
if litellm_params.get("no-log", False) == True:
# proxy cost tracking cal backs should run
if not (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
print_verbose("no-log request, skipping logging")
continue
if callback == "lite_debugger": if callback == "lite_debugger":
print_verbose("reaches lite_debugger for logging!") print_verbose("reaches lite_debugger for logging!")
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
@ -1707,7 +1716,20 @@ class Logging:
callbacks = litellm._async_success_callback callbacks = litellm._async_success_callback
verbose_logger.debug(f"Async success callbacks: {callbacks}") verbose_logger.debug(f"Async success callbacks: {callbacks}")
for callback in callbacks: for callback in callbacks:
# check if callback can run for this request
litellm_params = self.model_call_details.get("litellm_params", {})
if litellm_params.get("no-log", False) == True:
# proxy cost tracking cal backs should run
if not (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
print_verbose("no-log request, skipping logging")
continue
try: try:
if kwargs.get("no-log", False) == True:
print_verbose("no-log request, skipping logging")
continue
if callback == "cache" and litellm.cache is not None: if callback == "cache" and litellm.cache is not None:
# set_cache once complete streaming response is built # set_cache once complete streaming response is built
print_verbose("async success_callback: reaches cache for logging!") print_verbose("async success_callback: reaches cache for logging!")
@ -2985,11 +3007,13 @@ def client(original_function):
print_verbose( print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}" f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
) )
# check if user does not want this to be logged
asyncio.create_task( asyncio.create_task(
logging_obj.async_success_handler(result, start_time, end_time) logging_obj.async_success_handler(result, start_time, end_time)
) )
threading.Thread( threading.Thread(
target=logging_obj.success_handler, args=(result, start_time, end_time) target=logging_obj.success_handler,
args=(result, start_time, end_time),
).start() ).start()
# RETURN RESULT # RETURN RESULT
@ -3892,6 +3916,7 @@ def get_litellm_params(
proxy_server_request=None, proxy_server_request=None,
acompletion=None, acompletion=None,
preset_cache_key=None, preset_cache_key=None,
no_log=None,
): ):
litellm_params = { litellm_params = {
"acompletion": acompletion, "acompletion": acompletion,
@ -3908,6 +3933,7 @@ def get_litellm_params(
"model_info": model_info, "model_info": model_info,
"proxy_server_request": proxy_server_request, "proxy_server_request": proxy_server_request,
"preset_cache_key": preset_cache_key, "preset_cache_key": preset_cache_key,
"no-log": no_log,
"stream_response": {}, # litellm_call_id: ModelResponse Dict "stream_response": {}, # litellm_call_id: ModelResponse Dict
} }