From 7348956bf276dbbe04c2427d4ae2432bd77723ad Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 19 Feb 2024 14:17:52 -0800 Subject: [PATCH] docs(enterprise.md): add google text moderations to the docs --- docs/my-website/docs/proxy/enterprise.md | 55 +++++++++++++++++++ .../google_text_moderation.py | 19 ++++--- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index f881215e6..ad0691f48 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -13,6 +13,7 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se Features: - [ ] Content Moderation with LlamaGuard +- [ ] Content Moderation with Google Text Moderations - [ ] Tracking Spend for Custom Tags ## Content Moderation with LlamaGuard @@ -35,6 +36,60 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = "" os.environ["AWS_REGION_NAME"] = "" ``` +## Content Moderation with Google Text Moderation + +Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI). + +How to enable this in your config.yaml: + +```yaml +litellm_settings: + callbacks: ["google_text_moderation"] +``` + +### Set custom confidence thresholds + +Google Moderations checks the test against several categories. [Source](https://cloud.google.com/natural-language/docs/moderating-text#safety_attribute_confidence_scores) + +#### Set global default confidence threshold + +By default this is set to 0.8. But you can override this in your config.yaml. + +```yaml +litellm_settings: + google_moderation_confidence_threshold: 0.4 +``` + +#### Set category-specific confidence threshold + +Set a category specific confidence threshold in your config.yaml. If none set, the global default will be used. + +```yaml +litellm_settings: + toxic_confidence_threshold: 0.1 +``` + +Here are the category specific values: + +| Category | Setting | +| -------- | -------- | +| "toxic" | toxic_confidence_threshold: 0.1 | +| "insult" | insult_confidence_threshold: 0.1 | +| "profanity" | profanity_confidence_threshold: 0.1 | +| "derogatory" | derogatory_confidence_threshold: 0.1 | +| "sexual" | sexual_confidence_threshold: 0.1 | +| "death_harm_and_tragedy" | death_harm_and_tragedy_threshold: 0.1 | +| "violent" | violent_threshold: 0.1 | +| "firearms_and_weapons" | firearms_and_weapons_threshold: 0.1 | +| "public_safety" | public_safety_threshold: 0.1 | +| "health" | health_threshold: 0.1 | +| "religion_and_belief" | religion_and_belief_threshold: 0.1 | +| "illicit_drugs" | illicit_drugs_threshold: 0.1 | +| "war_and_conflict" | war_and_conflict_threshold: 0.1 | +| "politics" | politics_threshold: 0.1 | +| "finance" | finance_threshold: 0.1 | +| "legal" | legal_threshold: 0.1 | + ## Tracking Spend for Custom Tags Requirements: diff --git a/enterprise/enterprise_hooks/google_text_moderation.py b/enterprise/enterprise_hooks/google_text_moderation.py index a6a48a385..dad8bac45 100644 --- a/enterprise/enterprise_hooks/google_text_moderation.py +++ b/enterprise/enterprise_hooks/google_text_moderation.py @@ -60,14 +60,9 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger): self.language_document = language_v1.types.Document self.document_type = language_v1.types.Document.Type.PLAIN_TEXT - if hasattr(litellm, "google_moderation_confidence_threshold"): - default_confidence_threshold = ( - litellm.google_moderation_confidence_threshold - ) - else: - default_confidence_threshold = ( - 0.8 # by default require a high confidence (80%) to fail - ) + default_confidence_threshold = ( + litellm.google_moderation_confidence_threshold or 0.8 + ) # by default require a high confidence (80%) to fail for category in self.confidence_categories: if hasattr(litellm, f"{category}_confidence_threshold"): @@ -82,6 +77,13 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger): f"{category}_confidence_threshold", default_confidence_threshold, ) + set_confidence_value = getattr( + self, + f"{category}_confidence_threshold", + ) + verbose_proxy_logger.info( + f"Google Text Moderation: {category}_confidence_threshold: {set_confidence_value}" + ) def print_verbose(self, print_statement): try: @@ -112,7 +114,6 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger): # Make the request response = self.client.moderate_text(request=request) - print(response) for category in response.moderation_categories: category_name = category.name category_name = category_name.lower()