forked from phoenix/litellm-mirror
fix(llm_guard.py): add streaming hook for moderation calls
This commit is contained in:
parent
0a5b8f0e4e
commit
49847347d0
4 changed files with 36 additions and 25 deletions
|
@ -101,19 +101,16 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
- Use the sanitized prompt returned
|
- Use the sanitized prompt returned
|
||||||
- LLM Guard can handle things like PII Masking, etc.
|
- LLM Guard can handle things like PII Masking, etc.
|
||||||
"""
|
"""
|
||||||
if "messages" in data:
|
|
||||||
safety_check_messages = data["messages"][
|
|
||||||
-1
|
|
||||||
] # get the last response - llama guard has a 4k token limit
|
|
||||||
if (
|
|
||||||
isinstance(safety_check_messages, dict)
|
|
||||||
and "content" in safety_check_messages
|
|
||||||
and isinstance(safety_check_messages["content"], str)
|
|
||||||
):
|
|
||||||
await self.moderation_check(safety_check_messages["content"])
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
async def async_post_call_streaming_hook(
|
||||||
|
self, user_api_key_dict: UserAPIKeyAuth, response: str
|
||||||
|
):
|
||||||
|
if response is not None:
|
||||||
|
await self.moderation_check(text=response)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
# llm_guard = _ENTERPRISE_LLMGuard()
|
# llm_guard = _ENTERPRISE_LLMGuard()
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
async def async_moderation_hook(self, data: dict):
|
async def async_moderation_hook(self, data: dict):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def async_post_call_streaming_hook(
|
||||||
|
self,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
response: str,
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
#### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
|
#### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
|
||||||
|
|
||||||
def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
|
def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
|
||||||
|
|
|
@ -401,6 +401,27 @@ class ProxyLogging:
|
||||||
raise e
|
raise e
|
||||||
return new_response
|
return new_response
|
||||||
|
|
||||||
|
async def post_call_streaming_hook(
|
||||||
|
self,
|
||||||
|
response: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
- Check outgoing streaming response uptil that point
|
||||||
|
- Run through moderation check
|
||||||
|
- Reject request if it fails moderation check
|
||||||
|
"""
|
||||||
|
new_response = copy.deepcopy(response)
|
||||||
|
for callback in litellm.callbacks:
|
||||||
|
try:
|
||||||
|
if isinstance(callback, CustomLogger):
|
||||||
|
await callback.async_post_call_streaming_hook(
|
||||||
|
user_api_key_dict=user_api_key_dict, response=new_response
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
return new_response
|
||||||
|
|
||||||
|
|
||||||
### DB CONNECTOR ###
|
### DB CONNECTOR ###
|
||||||
# Define the retry decorator with backoff strategy
|
# Define the retry decorator with backoff strategy
|
||||||
|
|
|
@ -909,20 +909,6 @@ class Logging:
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if litellm.max_budget and self.stream:
|
|
||||||
start_time = self.start_time
|
|
||||||
end_time = (
|
|
||||||
self.start_time
|
|
||||||
) # no time has passed as the call hasn't been made yet
|
|
||||||
time_diff = (end_time - start_time).total_seconds()
|
|
||||||
float_diff = float(time_diff)
|
|
||||||
litellm._current_cost += litellm.completion_cost(
|
|
||||||
model=self.model,
|
|
||||||
prompt="".join(message["content"] for message in self.messages),
|
|
||||||
completion="",
|
|
||||||
total_time=float_diff,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
||||||
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue