forked from phoenix/litellm-mirror
Merge pull request #2408 from BerriAI/litellm_no_store_reqs
[FEAT-liteLLM Proxy] Incognito Requests - Don't log anything
This commit is contained in:
commit
8036b48f14
6 changed files with 89 additions and 20 deletions
|
@ -12,14 +12,16 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se
|
||||||
:::
|
:::
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- [ ] Content Moderation with LlamaGuard
|
- ✅ Content Moderation with LlamaGuard
|
||||||
- [ ] Content Moderation with Google Text Moderations
|
- ✅ Content Moderation with Google Text Moderations
|
||||||
- [ ] Content Moderation with LLM Guard
|
- ✅ Content Moderation with LLM Guard
|
||||||
- [ ] Reject calls from Blocked User list
|
- ✅ Reject calls from Blocked User list
|
||||||
- [ ] Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
||||||
- [ ] Tracking Spend for Custom Tags
|
- ✅ Don't log/store specific requests (eg confidential LLM requests)
|
||||||
|
- ✅ Tracking Spend for Custom Tags
|
||||||
|
|
||||||
## Content Moderation with LlamaGuard
|
## Content Moderation
|
||||||
|
### Content Moderation with LlamaGuard
|
||||||
|
|
||||||
Currently works with Sagemaker's LlamaGuard endpoint.
|
Currently works with Sagemaker's LlamaGuard endpoint.
|
||||||
|
|
||||||
|
@ -39,7 +41,7 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
os.environ["AWS_REGION_NAME"] = ""
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
```
|
```
|
||||||
|
|
||||||
### Customize LlamaGuard prompt
|
#### Customize LlamaGuard prompt
|
||||||
|
|
||||||
To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt)
|
To modify the unsafe categories llama guard evaluates against, just create your own version of [this category list](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/llamaguard_prompt.txt)
|
||||||
|
|
||||||
|
@ -51,7 +53,7 @@ callbacks: ["llamaguard_moderations"]
|
||||||
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
|
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
## Content Moderation with LLM Guard
|
### Content Moderation with LLM Guard
|
||||||
|
|
||||||
Set the LLM Guard API Base in your environment
|
Set the LLM Guard API Base in your environment
|
||||||
|
|
||||||
|
@ -78,7 +80,7 @@ Expected results:
|
||||||
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
|
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Content Moderation with Google Text Moderation
|
### Content Moderation with Google Text Moderation
|
||||||
|
|
||||||
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
|
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
|
||||||
|
|
||||||
|
@ -89,7 +91,7 @@ litellm_settings:
|
||||||
callbacks: ["google_text_moderation"]
|
callbacks: ["google_text_moderation"]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Set custom confidence thresholds
|
#### Set custom confidence thresholds
|
||||||
|
|
||||||
Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores)
|
Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores)
|
||||||
|
|
||||||
|
@ -133,6 +135,33 @@ Here are the category specific values:
|
||||||
| "legal" | legal_threshold: 0.1 |
|
| "legal" | legal_threshold: 0.1 |
|
||||||
|
|
||||||
|
|
||||||
|
## Incognito Requests - Don't log anything
|
||||||
|
|
||||||
|
When `no-log=True`, the request will **not be logged on any callbacks** and there will be **no server logs on litellm**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything", # proxy api-key
|
||||||
|
base_url="http://0.0.0.0:8000" # litellm proxy
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"no-log": True
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Enable Blocked User Lists
|
## Enable Blocked User Lists
|
||||||
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
|
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
|
||||||
|
|
|
@ -31,6 +31,18 @@ def _turn_on_debug():
|
||||||
verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug
|
verbose_proxy_logger.setLevel(level=logging.DEBUG) # set proxy logs to debug
|
||||||
|
|
||||||
|
|
||||||
|
def _disable_debugging():
|
||||||
|
verbose_logger.disabled = True
|
||||||
|
verbose_router_logger.disabled = True
|
||||||
|
verbose_proxy_logger.disabled = True
|
||||||
|
|
||||||
|
|
||||||
|
def _enable_debugging():
|
||||||
|
verbose_logger.disabled = False
|
||||||
|
verbose_router_logger.disabled = False
|
||||||
|
verbose_proxy_logger.disabled = False
|
||||||
|
|
||||||
|
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
try:
|
try:
|
||||||
if set_verbose:
|
if set_verbose:
|
||||||
|
|
|
@ -487,6 +487,8 @@ def completion(
|
||||||
### ASYNC CALLS ###
|
### ASYNC CALLS ###
|
||||||
acompletion = kwargs.get("acompletion", False)
|
acompletion = kwargs.get("acompletion", False)
|
||||||
client = kwargs.get("client", None)
|
client = kwargs.get("client", None)
|
||||||
|
### Admin Controls ###
|
||||||
|
no_log = kwargs.get("no-log", False)
|
||||||
######## end of unpacking kwargs ###########
|
######## end of unpacking kwargs ###########
|
||||||
openai_params = [
|
openai_params = [
|
||||||
"functions",
|
"functions",
|
||||||
|
@ -563,6 +565,7 @@ def completion(
|
||||||
"caching_groups",
|
"caching_groups",
|
||||||
"ttl",
|
"ttl",
|
||||||
"cache",
|
"cache",
|
||||||
|
"no-log",
|
||||||
]
|
]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {
|
non_default_params = {
|
||||||
|
@ -726,6 +729,7 @@ def completion(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
proxy_server_request=proxy_server_request,
|
proxy_server_request=proxy_server_request,
|
||||||
preset_cache_key=preset_cache_key,
|
preset_cache_key=preset_cache_key,
|
||||||
|
no_log=no_log,
|
||||||
)
|
)
|
||||||
logging.update_environment_variables(
|
logging.update_environment_variables(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -2417,6 +2421,7 @@ def embedding(
|
||||||
"caching_groups",
|
"caching_groups",
|
||||||
"ttl",
|
"ttl",
|
||||||
"cache",
|
"cache",
|
||||||
|
"no-log",
|
||||||
]
|
]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {
|
non_default_params = {
|
||||||
|
|
|
@ -5,12 +5,9 @@ model_list:
|
||||||
api_base: os.environ/AZURE_API_BASE
|
api_base: os.environ/AZURE_API_BASE
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
api_version: "2023-07-01-preview"
|
api_version: "2023-07-01-preview"
|
||||||
- model_name: azure-gpt-3.5
|
litellm_settings:
|
||||||
litellm_params:
|
set_verbose: True
|
||||||
model: gpt-3.5-turbo
|
success_callback: ["langfuse"]
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
model_info:
|
|
||||||
access_groups: ["public"]
|
|
||||||
router_settings:
|
router_settings:
|
||||||
set_verbose: True
|
set_verbose: True
|
||||||
debug_level: "DEBUG"
|
debug_level: "DEBUG"
|
|
@ -1675,9 +1675,9 @@ class ProxyConfig:
|
||||||
# these are litellm callbacks - "langfuse", "sentry", "wandb"
|
# these are litellm callbacks - "langfuse", "sentry", "wandb"
|
||||||
else:
|
else:
|
||||||
litellm.success_callback.append(callback)
|
litellm.success_callback.append(callback)
|
||||||
verbose_proxy_logger.debug(
|
print( # noqa
|
||||||
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
|
f"{blue_color_code} Initialized Success Callbacks - {litellm.success_callback} {reset_color_code}"
|
||||||
)
|
) # noqa
|
||||||
elif key == "failure_callback":
|
elif key == "failure_callback":
|
||||||
litellm.failure_callback = []
|
litellm.failure_callback = []
|
||||||
|
|
||||||
|
|
|
@ -1279,6 +1279,15 @@ class Logging:
|
||||||
|
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
try:
|
try:
|
||||||
|
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
|
if litellm_params.get("no-log", False) == True:
|
||||||
|
# proxy cost tracking cal backs should run
|
||||||
|
if not (
|
||||||
|
isinstance(callback, CustomLogger)
|
||||||
|
and "_PROXY_" in callback.__class__.__name__
|
||||||
|
):
|
||||||
|
print_verbose("no-log request, skipping logging")
|
||||||
|
continue
|
||||||
if callback == "lite_debugger":
|
if callback == "lite_debugger":
|
||||||
print_verbose("reaches lite_debugger for logging!")
|
print_verbose("reaches lite_debugger for logging!")
|
||||||
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
|
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
|
||||||
|
@ -1707,7 +1716,20 @@ class Logging:
|
||||||
callbacks = litellm._async_success_callback
|
callbacks = litellm._async_success_callback
|
||||||
verbose_logger.debug(f"Async success callbacks: {callbacks}")
|
verbose_logger.debug(f"Async success callbacks: {callbacks}")
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
|
# check if callback can run for this request
|
||||||
|
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
|
if litellm_params.get("no-log", False) == True:
|
||||||
|
# proxy cost tracking cal backs should run
|
||||||
|
if not (
|
||||||
|
isinstance(callback, CustomLogger)
|
||||||
|
and "_PROXY_" in callback.__class__.__name__
|
||||||
|
):
|
||||||
|
print_verbose("no-log request, skipping logging")
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
|
if kwargs.get("no-log", False) == True:
|
||||||
|
print_verbose("no-log request, skipping logging")
|
||||||
|
continue
|
||||||
if callback == "cache" and litellm.cache is not None:
|
if callback == "cache" and litellm.cache is not None:
|
||||||
# set_cache once complete streaming response is built
|
# set_cache once complete streaming response is built
|
||||||
print_verbose("async success_callback: reaches cache for logging!")
|
print_verbose("async success_callback: reaches cache for logging!")
|
||||||
|
@ -2985,11 +3007,13 @@ def client(original_function):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||||
)
|
)
|
||||||
|
# check if user does not want this to be logged
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
logging_obj.async_success_handler(result, start_time, end_time)
|
logging_obj.async_success_handler(result, start_time, end_time)
|
||||||
)
|
)
|
||||||
threading.Thread(
|
threading.Thread(
|
||||||
target=logging_obj.success_handler, args=(result, start_time, end_time)
|
target=logging_obj.success_handler,
|
||||||
|
args=(result, start_time, end_time),
|
||||||
).start()
|
).start()
|
||||||
|
|
||||||
# RETURN RESULT
|
# RETURN RESULT
|
||||||
|
@ -3892,6 +3916,7 @@ def get_litellm_params(
|
||||||
proxy_server_request=None,
|
proxy_server_request=None,
|
||||||
acompletion=None,
|
acompletion=None,
|
||||||
preset_cache_key=None,
|
preset_cache_key=None,
|
||||||
|
no_log=None,
|
||||||
):
|
):
|
||||||
litellm_params = {
|
litellm_params = {
|
||||||
"acompletion": acompletion,
|
"acompletion": acompletion,
|
||||||
|
@ -3908,6 +3933,7 @@ def get_litellm_params(
|
||||||
"model_info": model_info,
|
"model_info": model_info,
|
||||||
"proxy_server_request": proxy_server_request,
|
"proxy_server_request": proxy_server_request,
|
||||||
"preset_cache_key": preset_cache_key,
|
"preset_cache_key": preset_cache_key,
|
||||||
|
"no-log": no_log,
|
||||||
"stream_response": {}, # litellm_call_id: ModelResponse Dict
|
"stream_response": {}, # litellm_call_id: ModelResponse Dict
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue