forked from phoenix/litellm-mirror
Merge pull request #2408 from BerriAI/litellm_no_store_reqs
[FEAT-liteLLM Proxy] Incognito Requests - Don't log anything
This commit is contained in:
commit
8036b48f14
6 changed files with 89 additions and 20 deletions
|
@ -12,14 +12,16 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se
|
|||
:::
|
||||
|
||||
Features:
|
||||
- [ ] Content Moderation with LlamaGuard
|
||||
- [ ] Content Moderation with Google Text Moderations
|
||||
- [ ] Content Moderation with LLM Guard
|
||||
- [ ] Reject calls from Blocked User list
|
||||
- [ ] Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
||||
- [ ] Tracking Spend for Custom Tags
|
||||
- ✅ Content Moderation with LlamaGuard
|
||||
- ✅ Content Moderation with Google Text Moderations
|
||||
- ✅ Content Moderation with LLM Guard
|
||||
- ✅ Reject calls from Blocked User list
|
||||
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
||||
- ✅ Don't log/store specific requests (eg confidential LLM requests)
|
||||
- ✅ Tracking Spend for Custom Tags
|
||||
|
||||
## Content Moderation with LlamaGuard
|
||||
## Content Moderation
|
||||
### Content Moderation with LlamaGuard
|
||||
|
||||
Currently works with Sagemaker's LlamaGuard endpoint.
|
||||
|
||||
|
@ -39,7 +41,7 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
|||
os.environ["AWS_REGION_NAME"] = ""
|
||||
```
|
||||
|
||||
### Customize LlamaGuard prompt
|
||||
#### Customize LlamaGuard prompt
|
||||
|
||||
To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt)
|
||||
|
||||
|
@ -51,7 +53,7 @@ callbacks: ["llamaguard_moderations"]
|
|||
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
|
||||
```
|
||||
|
||||
## Content Moderation with LLM Guard
|
||||
### Content Moderation with LLM Guard
|
||||
|
||||
Set the LLM Guard API Base in your environment
|
||||
|
||||
|
@ -78,7 +80,7 @@ Expected results:
|
|||
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
|
||||
```
|
||||
|
||||
## Content Moderation with Google Text Moderation
|
||||
### Content Moderation with Google Text Moderation
|
||||
|
||||
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
|
||||
|
||||
|
@ -89,7 +91,7 @@ litellm_settings:
|
|||
callbacks: ["google_text_moderation"]
|
||||
```
|
||||
|
||||
### Set custom confidence thresholds
|
||||
#### Set custom confidence thresholds
|
||||
|
||||
Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores)
|
||||
|
||||
|
@ -133,6 +135,33 @@ Here are the category specific values:
|
|||
| "legal" | legal_threshold: 0.1 |
|
||||
|
||||
|
||||
## Incognito Requests - Don't log anything
|
||||
|
||||
When `no-log=True`, the request will **not be logged on any callbacks** and there will be **no server logs on litellm**
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything", # proxy api-key
|
||||
base_url="http://0.0.0.0:8000" # litellm proxy
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"no-log": True
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
|
||||
## Enable Blocked User Lists
|
||||
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
|
||||
|
|
|
@ -31,6 +31,18 @@ def _turn_on_debug():
|
|||
verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug
|
||||
|
||||
|
||||
def _disable_debugging():
|
||||
verbose_logger.disabled = True
|
||||
verbose_router_logger.disabled = True
|
||||
verbose_proxy_logger.disabled = True
|
||||
|
||||
|
||||
def _enable_debugging():
|
||||
verbose_logger.disabled = False
|
||||
verbose_router_logger.disabled = False
|
||||
verbose_proxy_logger.disabled = False
|
||||
|
||||
|
||||
def print_verbose(print_statement):
|
||||
try:
|
||||
if set_verbose:
|
||||
|
|
|
@ -487,6 +487,8 @@ def completion(
|
|||
### ASYNC CALLS ###
|
||||
acompletion = kwargs.get("acompletion", False)
|
||||
client = kwargs.get("client", None)
|
||||
### Admin Controls ###
|
||||
no_log = kwargs.get("no-log", False)
|
||||
######## end of unpacking kwargs ###########
|
||||
openai_params = [
|
||||
"functions",
|
||||
|
@ -563,6 +565,7 @@ def completion(
|
|||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"no-log",
|
||||
]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
|
@ -726,6 +729,7 @@ def completion(
|
|||
model_info=model_info,
|
||||
proxy_server_request=proxy_server_request,
|
||||
preset_cache_key=preset_cache_key,
|
||||
no_log=no_log,
|
||||
)
|
||||
logging.update_environment_variables(
|
||||
model=model,
|
||||
|
@ -2417,6 +2421,7 @@ def embedding(
|
|||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"no-log",
|
||||
]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
|
|
|
@ -5,12 +5,9 @@ model_list:
|
|||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
- model_name: azure-gpt-3.5
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
access_groups: ["public"]
|
||||
litellm_settings:
|
||||
set_verbose: True
|
||||
success_callback: ["langfuse"]
|
||||
router_settings:
|
||||
set_verbose: True
|
||||
debug_level: "DEBUG"
|
|
@ -1675,9 +1675,9 @@ class ProxyConfig:
|
|||
# these are litellm callbacks - "langfuse", "sentry", "wandb"
|
||||
else:
|
||||
litellm.success_callback.append(callback)
|
||||
verbose_proxy_logger.debug(
|
||||
print( # noqa
|
||||
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
|
||||
)
|
||||
) # noqa
|
||||
elif key == "failure_callback":
|
||||
litellm.failure_callback = []
|
||||
|
||||
|
|
|
@ -1279,6 +1279,15 @@ class Logging:
|
|||
|
||||
for callback in callbacks:
|
||||
try:
|
||||
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||
if litellm_params.get("no-log", False) == True:
|
||||
# proxy cost tracking cal backs should run
|
||||
if not (
|
||||
isinstance(callback, CustomLogger)
|
||||
and "_PROXY_" in callback.__class__.__name__
|
||||
):
|
||||
print_verbose("no-log request, skipping logging")
|
||||
continue
|
||||
if callback == "lite_debugger":
|
||||
print_verbose("reaches lite_debugger for logging!")
|
||||
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
|
||||
|
@ -1707,7 +1716,20 @@ class Logging:
|
|||
callbacks = litellm._async_success_callback
|
||||
verbose_logger.debug(f"Async success callbacks: {callbacks}")
|
||||
for callback in callbacks:
|
||||
# check if callback can run for this request
|
||||
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||
if litellm_params.get("no-log", False) == True:
|
||||
# proxy cost tracking cal backs should run
|
||||
if not (
|
||||
isinstance(callback, CustomLogger)
|
||||
and "_PROXY_" in callback.__class__.__name__
|
||||
):
|
||||
print_verbose("no-log request, skipping logging")
|
||||
continue
|
||||
try:
|
||||
if kwargs.get("no-log", False) == True:
|
||||
print_verbose("no-log request, skipping logging")
|
||||
continue
|
||||
if callback == "cache" and litellm.cache is not None:
|
||||
# set_cache once complete streaming response is built
|
||||
print_verbose("async success_callback: reaches cache for logging!")
|
||||
|
@ -2985,11 +3007,13 @@ def client(original_function):
|
|||
print_verbose(
|
||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||
)
|
||||
# check if user does not want this to be logged
|
||||
asyncio.create_task(
|
||||
logging_obj.async_success_handler(result, start_time, end_time)
|
||||
)
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler, args=(result, start_time, end_time)
|
||||
target=logging_obj.success_handler,
|
||||
args=(result, start_time, end_time),
|
||||
).start()
|
||||
|
||||
# RETURN RESULT
|
||||
|
@ -3892,6 +3916,7 @@ def get_litellm_params(
|
|||
proxy_server_request=None,
|
||||
acompletion=None,
|
||||
preset_cache_key=None,
|
||||
no_log=None,
|
||||
):
|
||||
litellm_params = {
|
||||
"acompletion": acompletion,
|
||||
|
@ -3908,6 +3933,7 @@ def get_litellm_params(
|
|||
"model_info": model_info,
|
||||
"proxy_server_request": proxy_server_request,
|
||||
"preset_cache_key": preset_cache_key,
|
||||
"no-log": no_log,
|
||||
"stream_response": {}, # litellm_call_id: ModelResponse Dict
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue