mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
* feat(litellm_pre_call_utils.py): support forwarding request headers to backend llm api * fix(litellm_pre_call_utils.py): handle custom litellm key header * test(router_code_coverage.py): check if all router functions are dire… (#6186) * test(router_code_coverage.py): check if all router functions are directly tested prevent regressions * docs(configs.md): document all environment variables (#6185) * docs: make it easier to find anthropic/openai prompt caching doc * aded codecov yml (#6207) * fix codecov.yaml * run ci/cd again * (refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered * (feat) prometheus have well defined latency buckets (#6211) * fix prometheus have well defined latency buckets * use a well define latency bucket * use types file for prometheus logging * add test for LATENCY_BUCKETS * fix prom testing * fix config.yml * (refactor caching) use LLMCachingHandler for caching streaming responses (#6210) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting * bump (#6187) * update code cov yaml * fix config.yml * add caching component to code cov * fix config.yml ci/cd * add coverage for proxy auth * (refactor caching) use common `_retrieve_from_cache` helper (#6212) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting * refactor - use _retrieve_from_cache * refactor use _convert_cached_result_to_model_response * fix linting errors * bump: version 1.49.2 → 1.49.3 * fix code cov components * test(test_router_helpers.py): add router component unit tests * test: add additional router tests * test: add more router testing * test: add more router testing + more mock functions * ci(router_code_coverage.py): fix check --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> * bump: version 1.49.3 → 1.49.4 * (refactor) use helper function `_assemble_complete_response_from_streaming_chunks` to assemble complete responses in caching and logging callbacks (#6220) * (refactor) use _assemble_complete_response_from_streaming_chunks * add unit test for test_assemble_complete_response_from_streaming_chunks_1 * fix assemble complete_streaming_response * config add logging_testing * add logging_coverage in codecov * test test_assemble_complete_response_from_streaming_chunks_3 * add unit tests for _assemble_complete_response_from_streaming_chunks * fix remove unused / junk function * add test for streaming_chunks when error assembling * (refactor) OTEL - use safe_set_attribute for setting attributes (#6226) * otel - use safe_set_attribute for setting attributes * fix OTEL only use safe_set_attribute * (fix) prompt caching cost calculation OpenAI, Azure OpenAI (#6231) * fix prompt caching cost calculation * fix testing for prompt cache cost calc * fix(allowed_model_region): allow us as allowed region (#6234) * test(router_code_coverage.py): check if all router functions are dire… (#6186) * test(router_code_coverage.py): check if all router functions are directly tested prevent regressions * docs(configs.md): document all environment variables (#6185) * docs: make it easier to find anthropic/openai prompt caching doc * aded codecov yml (#6207) * fix codecov.yaml * run ci/cd again * (refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered * (feat) prometheus have well defined latency buckets (#6211) * fix prometheus have well defined latency buckets * use a well define latency bucket * use types file for prometheus logging * add test for LATENCY_BUCKETS * fix prom testing * fix config.yml * (refactor caching) use LLMCachingHandler for caching streaming responses (#6210) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting * bump (#6187) * update code cov yaml * fix config.yml * add caching component to code cov * fix config.yml ci/cd * add coverage for proxy auth * (refactor caching) use common `_retrieve_from_cache` helper (#6212) * use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * refactor async set stream cache * fix linting * refactor - use _retrieve_from_cache * refactor use _convert_cached_result_to_model_response * fix linting errors * bump: version 1.49.2 → 1.49.3 * fix code cov components * test(test_router_helpers.py): add router component unit tests * test: add additional router tests * test: add more router testing * test: add more router testing + more mock functions * ci(router_code_coverage.py): fix check --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> * bump: version 1.49.3 → 1.49.4 * (refactor) use helper function `_assemble_complete_response_from_streaming_chunks` to assemble complete responses in caching and logging callbacks (#6220) * (refactor) use _assemble_complete_response_from_streaming_chunks * add unit test for test_assemble_complete_response_from_streaming_chunks_1 * fix assemble complete_streaming_response * config add logging_testing * add logging_coverage in codecov * test test_assemble_complete_response_from_streaming_chunks_3 * add unit tests for _assemble_complete_response_from_streaming_chunks * fix remove unused / junk function * add test for streaming_chunks when error assembling * (refactor) OTEL - use safe_set_attribute for setting attributes (#6226) * otel - use safe_set_attribute for setting attributes * fix OTEL only use safe_set_attribute * fix(allowed_model_region): allow us as allowed region --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> * fix(litellm_pre_call_utils.py): support 'us' region routing + fix header forwarding to filter on `x-` headers * docs(customer_routing.md): fix region-based routing example * feat(azure.py): handle empty arguments function call - azure Closes https://github.com/BerriAI/litellm/issues/6241 * feat(guardrails_ai.py): support guardrails ai integration Adds support for on-prem guardrails via guardrails ai * fix(proxy/utils.py): prevent sql injection attack Fixes https://huntr.com/bounties/a4f6d357-5b44-4e00-9cac-f1cc351211d2 * fix: fix linting errors * fix(litellm_pre_call_utils.py): don't log litellm api key in proxy server request headers * fix(litellm_pre_call_utils.py): don't forward stainless headers * docs(guardrails_ai.md): add guardrails ai quick start to docs * test: handle flaky test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> Co-authored-by: Marcus Elwin <marcus@elwin.com>
109 lines
3.7 KiB
Python
109 lines
3.7 KiB
Python
# +-------------------------------------------------------------+
|
|
#
|
|
# Use GuardrailsAI for your LLM calls
|
|
#
|
|
# +-------------------------------------------------------------+
|
|
# Thank you for using Litellm! - Krrish & Ishaan
|
|
|
|
import json
|
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
|
|
|
from fastapi import HTTPException
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_proxy_logger
|
|
from litellm.caching.caching import DualCache
|
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
|
from litellm.llms.prompt_templates.common_utils import (
|
|
convert_openai_message_to_only_content_messages,
|
|
get_content_from_model_response,
|
|
)
|
|
from litellm.proxy._types import UserAPIKeyAuth
|
|
from litellm.proxy.common_utils.callback_utils import (
|
|
add_guardrail_to_applied_guardrails_header,
|
|
)
|
|
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
|
from litellm.types.guardrails import GuardrailEventHooks
|
|
from litellm.types.llms.openai import AllMessageValues
|
|
|
|
|
|
class GuardrailsAIResponse(TypedDict):
|
|
callId: str
|
|
rawLlmOutput: str
|
|
validatedOutput: str
|
|
validationPassed: bool
|
|
|
|
|
|
class GuardrailsAI(CustomGuardrail):
|
|
def __init__(
|
|
self,
|
|
guard_name: str,
|
|
api_base: Optional[str] = None,
|
|
**kwargs,
|
|
):
|
|
if guard_name is None:
|
|
raise Exception(
|
|
"GuardrailsAIException - Please pass the Guardrails AI guard name via 'litellm_params::guard_name'"
|
|
)
|
|
# store kwargs as optional_params
|
|
self.guardrails_ai_api_base = api_base or "http://0.0.0.0:8000"
|
|
self.guardrails_ai_guard_name = guard_name
|
|
self.optional_params = kwargs
|
|
supported_event_hooks = [GuardrailEventHooks.post_call]
|
|
super().__init__(supported_event_hooks=supported_event_hooks, **kwargs)
|
|
|
|
async def make_guardrails_ai_api_request(self, llm_output: str):
|
|
from httpx import URL
|
|
|
|
data = {"llmOutput": llm_output}
|
|
_json_data = json.dumps(data)
|
|
response = await litellm.module_level_aclient.post(
|
|
url=str(
|
|
URL(self.guardrails_ai_api_base).join(
|
|
f"guards/{self.guardrails_ai_guard_name}/validate"
|
|
)
|
|
),
|
|
data=_json_data,
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
},
|
|
)
|
|
verbose_proxy_logger.debug("guardrails_ai response: %s", response)
|
|
_json_response = GuardrailsAIResponse(**response.json()) # type: ignore
|
|
if _json_response.get("validationPassed") is False:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail={
|
|
"error": "Violated guardrail policy",
|
|
"guardrails_ai_response": _json_response,
|
|
},
|
|
)
|
|
return _json_response
|
|
|
|
async def async_post_call_success_hook(
|
|
self,
|
|
data: dict,
|
|
user_api_key_dict: UserAPIKeyAuth,
|
|
response,
|
|
):
|
|
"""
|
|
Runs on response from LLM API call
|
|
|
|
It can be used to reject a response
|
|
"""
|
|
event_type: GuardrailEventHooks = GuardrailEventHooks.post_call
|
|
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
|
|
return
|
|
|
|
if not isinstance(response, litellm.ModelResponse):
|
|
return
|
|
|
|
response_str: str = get_content_from_model_response(response)
|
|
if response_str is not None and len(response_str) > 0:
|
|
await self.make_guardrails_ai_api_request(llm_output=response_str)
|
|
|
|
add_guardrail_to_applied_guardrails_header(
|
|
request_data=data, guardrail_name=self.guardrail_name
|
|
)
|
|
|
|
return
|