diff --git a/litellm/proxy/post_call_rules.py b/litellm/proxy/post_call_rules.py new file mode 100644 index 000000000..23ec93f5b --- /dev/null +++ b/litellm/proxy/post_call_rules.py @@ -0,0 +1,8 @@ +def post_response_rule(input): # receives the model response + print(f"post_response_rule:input={input}") # noqa + if len(input) < 200: + return { + "decision": False, + "message": "This violates LiteLLM Proxy Rules. Response too short", + } + return {"decision": True} # message not required since, request will pass diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 81374c8ee..2699e9d9c 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -14,6 +14,9 @@ model_list: - model_name: BEDROCK_GROUP litellm_params: model: bedrock/cohere.command-text-v14 + - model_name: sagemaker + litellm_params: + model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4 - model_name: openai-gpt-3.5 litellm_params: model: gpt-3.5-turbo @@ -42,6 +45,7 @@ model_list: model_info: mode: embedding litellm_settings: + post_call_rules: post_call_rules.post_response_rule fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] # cache: True # setting callback class