Merge pull request #2408 from BerriAI/litellm_no_store_reqs

[FEAT-liteLLM Proxy] Incognito Requests -  Don't log anything
This commit is contained in:
Ishaan Jaff 2024-03-08 21:11:43 -08:00 committed by GitHub
commit 8036b48f14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 89 additions and 20 deletions

View file

@ -12,14 +12,16 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se
:::
Features:
- [ ] Content Moderation with LlamaGuard
- [ ] Content Moderation with Google Text Moderations
- [ ] Content Moderation with LLM Guard
- [ ] Reject calls from Blocked User list
- [ ] Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
- [ ] Tracking Spend for Custom Tags
- ✅ Content Moderation with LlamaGuard
- ✅ Content Moderation with Google Text Moderations
- ✅ Content Moderation with LLM Guard
- ✅ Reject calls from Blocked User list
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
- ✅ Don't log/store specific requests (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags
## Content Moderation with LlamaGuard
## Content Moderation
### Content Moderation with LlamaGuard
Currently works with Sagemaker's LlamaGuard endpoint.
@ -39,7 +41,7 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
```
### Customize LlamaGuard prompt
#### Customize LlamaGuard prompt
To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt)
@ -51,7 +53,7 @@ callbacks: ["llamaguard_moderations"]
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
```
## Content Moderation with LLM Guard
### Content Moderation with LLM Guard
Set the LLM Guard API Base in your environment
@ -78,7 +80,7 @@ Expected results:
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
```
## Content Moderation with Google Text Moderation
### Content Moderation with Google Text Moderation
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
@ -89,7 +91,7 @@ litellm_settings:
callbacks: ["google_text_moderation"]
```
### Set custom confidence thresholds
#### Set custom confidence thresholds
Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores)
@ -133,6 +135,33 @@ Here are the category specific values:
| "legal" | legal_threshold: 0.1 |
## Incognito Requests - Don't log anything
When `no-log=True`, the request will **not be logged on any callbacks** and there will be **no server logs on litellm**
```python
import openai
client = openai.OpenAI(
api_key="anything", # proxy api-key
base_url="http://0.0.0.0:8000" # litellm proxy
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"no-log": True
}
)
print(response)
```
## Enable Blocked User Lists
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features

View file

@ -31,6 +31,18 @@ def _turn_on_debug():
verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug
def _disable_debugging():
verbose_logger.disabled = True
verbose_router_logger.disabled = True
verbose_proxy_logger.disabled = True
def _enable_debugging():
verbose_logger.disabled = False
verbose_router_logger.disabled = False
verbose_proxy_logger.disabled = False
def print_verbose(print_statement):
try:
if set_verbose:

View file

@ -487,6 +487,8 @@ def completion(
### ASYNC CALLS ###
acompletion = kwargs.get("acompletion", False)
client = kwargs.get("client", None)
### Admin Controls ###
no_log = kwargs.get("no-log", False)
######## end of unpacking kwargs ###########
openai_params = [
"functions",
@ -563,6 +565,7 @@ def completion(
"caching_groups",
"ttl",
"cache",
"no-log",
]
default_params = openai_params + litellm_params
non_default_params = {
@ -726,6 +729,7 @@ def completion(
model_info=model_info,
proxy_server_request=proxy_server_request,
preset_cache_key=preset_cache_key,
no_log=no_log,
)
logging.update_environment_variables(
model=model,
@ -2417,6 +2421,7 @@ def embedding(
"caching_groups",
"ttl",
"cache",
"no-log",
]
default_params = openai_params + litellm_params
non_default_params = {

View file

@ -5,12 +5,9 @@ model_list:
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
- model_name: azure-gpt-3.5
litellm_params:
model: gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
model_info:
access_groups: ["public"]
litellm_settings:
set_verbose: True
success_callback: ["langfuse"]
router_settings:
set_verbose: True
debug_level: "DEBUG"

View file

@ -1675,9 +1675,9 @@ class ProxyConfig:
# these are litellm callbacks - "langfuse", "sentry", "wandb"
else:
litellm.success_callback.append(callback)
verbose_proxy_logger.debug(
print( # noqa
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
)
) # noqa
elif key == "failure_callback":
litellm.failure_callback = []

View file

@ -1279,6 +1279,15 @@ class Logging:
for callback in callbacks:
try:
litellm_params = self.model_call_details.get("litellm_params", {})
if litellm_params.get("no-log", False) == True:
# proxy cost tracking cal backs should run
if not (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
print_verbose("no-log request, skipping logging")
continue
if callback == "lite_debugger":
print_verbose("reaches lite_debugger for logging!")
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
@ -1707,7 +1716,20 @@ class Logging:
callbacks = litellm._async_success_callback
verbose_logger.debug(f"Async success callbacks: {callbacks}")
for callback in callbacks:
# check if callback can run for this request
litellm_params = self.model_call_details.get("litellm_params", {})
if litellm_params.get("no-log", False) == True:
# proxy cost tracking cal backs should run
if not (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
print_verbose("no-log request, skipping logging")
continue
try:
if kwargs.get("no-log", False) == True:
print_verbose("no-log request, skipping logging")
continue
if callback == "cache" and litellm.cache is not None:
# set_cache once complete streaming response is built
print_verbose("async success_callback: reaches cache for logging!")
@ -2985,11 +3007,13 @@ def client(original_function):
print_verbose(
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
)
# check if user does not want this to be logged
asyncio.create_task(
logging_obj.async_success_handler(result, start_time, end_time)
)
threading.Thread(
target=logging_obj.success_handler, args=(result, start_time, end_time)
target=logging_obj.success_handler,
args=(result, start_time, end_time),
).start()
# RETURN RESULT
@ -3892,6 +3916,7 @@ def get_litellm_params(
proxy_server_request=None,
acompletion=None,
preset_cache_key=None,
no_log=None,
):
litellm_params = {
"acompletion": acompletion,
@ -3908,6 +3933,7 @@ def get_litellm_params(
"model_info": model_info,
"proxy_server_request": proxy_server_request,
"preset_cache_key": preset_cache_key,
"no-log": no_log,
"stream_response": {}, # litellm_call_id: ModelResponse Dict
}