mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Litellm dev 02 07 2025 p2 (#8377)
* fix(caching_routes.py): mask redis password on `/cache/ping` route * fix(caching_routes.py): fix linting erro * fix(caching_routes.py): fix linting error on caching routes * fix: fix test - ignore mask_dict - has a breakpoint * fix(azure.py): add timeout param + elapsed time in azure timeout error * fix(http_handler.py): add elapsed time to http timeout request makes it easier to debug how long request took before failing
This commit is contained in:
parent
ff2529a994
commit
7363b072c1
7 changed files with 126 additions and 25 deletions
81
litellm/litellm_core_utils/sensitive_data_masker.py
Normal file
81
litellm/litellm_core_utils/sensitive_data_masker.py
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
from typing import Any, Dict, Optional, Set
|
||||||
|
|
||||||
|
|
||||||
|
class SensitiveDataMasker:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
sensitive_patterns: Optional[Set[str]] = None,
|
||||||
|
visible_prefix: int = 4,
|
||||||
|
visible_suffix: int = 4,
|
||||||
|
mask_char: str = "*",
|
||||||
|
):
|
||||||
|
self.sensitive_patterns = sensitive_patterns or {
|
||||||
|
"password",
|
||||||
|
"secret",
|
||||||
|
"key",
|
||||||
|
"token",
|
||||||
|
"auth",
|
||||||
|
"credential",
|
||||||
|
"access",
|
||||||
|
"private",
|
||||||
|
"certificate",
|
||||||
|
}
|
||||||
|
|
||||||
|
self.visible_prefix = visible_prefix
|
||||||
|
self.visible_suffix = visible_suffix
|
||||||
|
self.mask_char = mask_char
|
||||||
|
|
||||||
|
def _mask_value(self, value: str) -> str:
|
||||||
|
if not value or len(str(value)) < (self.visible_prefix + self.visible_suffix):
|
||||||
|
return value
|
||||||
|
|
||||||
|
value_str = str(value)
|
||||||
|
masked_length = len(value_str) - (self.visible_prefix + self.visible_suffix)
|
||||||
|
return f"{value_str[:self.visible_prefix]}{self.mask_char * masked_length}{value_str[-self.visible_suffix:]}"
|
||||||
|
|
||||||
|
def is_sensitive_key(self, key: str) -> bool:
|
||||||
|
key_lower = str(key).lower()
|
||||||
|
result = any(pattern in key_lower for pattern in self.sensitive_patterns)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def mask_dict(
|
||||||
|
self, data: Dict[str, Any], depth: int = 0, max_depth: int = 10
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
if depth >= max_depth:
|
||||||
|
return data
|
||||||
|
|
||||||
|
masked_data: Dict[str, Any] = {}
|
||||||
|
for k, v in data.items():
|
||||||
|
try:
|
||||||
|
if isinstance(v, dict):
|
||||||
|
masked_data[k] = self.mask_dict(v, depth + 1)
|
||||||
|
elif hasattr(v, "__dict__") and not isinstance(v, type):
|
||||||
|
masked_data[k] = self.mask_dict(vars(v), depth + 1)
|
||||||
|
elif self.is_sensitive_key(k):
|
||||||
|
str_value = str(v) if v is not None else ""
|
||||||
|
masked_data[k] = self._mask_value(str_value)
|
||||||
|
else:
|
||||||
|
masked_data[k] = (
|
||||||
|
v if isinstance(v, (int, float, bool, str)) else str(v)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
masked_data[k] = "<unable to serialize>"
|
||||||
|
|
||||||
|
return masked_data
|
||||||
|
|
||||||
|
|
||||||
|
# Usage example:
|
||||||
|
"""
|
||||||
|
masker = SensitiveDataMasker()
|
||||||
|
data = {
|
||||||
|
"api_key": "sk-1234567890abcdef",
|
||||||
|
"redis_password": "very_secret_pass",
|
||||||
|
"port": 6379
|
||||||
|
}
|
||||||
|
masked = masker.mask_dict(data)
|
||||||
|
# Result: {
|
||||||
|
# "api_key": "sk-1****cdef",
|
||||||
|
# "redis_password": "very****pass",
|
||||||
|
# "port": 6379
|
||||||
|
# }
|
||||||
|
"""
|
|
@ -5,7 +5,7 @@ import time
|
||||||
from typing import Any, Callable, Dict, List, Literal, Optional, Union
|
from typing import Any, Callable, Dict, List, Literal, Optional, Union
|
||||||
|
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
from openai import AsyncAzureOpenAI, AzureOpenAI
|
from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
@ -305,6 +305,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
- call chat.completions.create.with_raw_response when litellm.return_response_headers is True
|
- call chat.completions.create.with_raw_response when litellm.return_response_headers is True
|
||||||
- call chat.completions.create by default
|
- call chat.completions.create by default
|
||||||
"""
|
"""
|
||||||
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
raw_response = await azure_client.chat.completions.with_raw_response.create(
|
raw_response = await azure_client.chat.completions.with_raw_response.create(
|
||||||
**data, timeout=timeout
|
**data, timeout=timeout
|
||||||
|
@ -313,6 +314,11 @@ class AzureChatCompletion(BaseLLM):
|
||||||
headers = dict(raw_response.headers)
|
headers = dict(raw_response.headers)
|
||||||
response = raw_response.parse()
|
response = raw_response.parse()
|
||||||
return headers, response
|
return headers, response
|
||||||
|
except APITimeoutError as e:
|
||||||
|
end_time = time.time()
|
||||||
|
time_delta = round(end_time - start_time, 2)
|
||||||
|
e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
|
||||||
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
@ -642,6 +648,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
raise AzureOpenAIError(status_code=500, message=str(e))
|
raise AzureOpenAIError(status_code=500, message=str(e))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
message = getattr(e, "message", str(e))
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=data["messages"],
|
input=data["messages"],
|
||||||
|
@ -652,7 +659,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
if hasattr(e, "status_code"):
|
if hasattr(e, "status_code"):
|
||||||
raise e
|
raise e
|
||||||
else:
|
else:
|
||||||
raise AzureOpenAIError(status_code=500, message=str(e))
|
raise AzureOpenAIError(status_code=500, message=message)
|
||||||
|
|
||||||
def streaming(
|
def streaming(
|
||||||
self,
|
self,
|
||||||
|
@ -797,10 +804,11 @@ class AzureChatCompletion(BaseLLM):
|
||||||
status_code = getattr(e, "status_code", 500)
|
status_code = getattr(e, "status_code", 500)
|
||||||
error_headers = getattr(e, "headers", None)
|
error_headers = getattr(e, "headers", None)
|
||||||
error_response = getattr(e, "response", None)
|
error_response = getattr(e, "response", None)
|
||||||
|
message = getattr(e, "message", str(e))
|
||||||
if error_headers is None and error_response:
|
if error_headers is None and error_response:
|
||||||
error_headers = getattr(error_response, "headers", None)
|
error_headers = getattr(error_response, "headers", None)
|
||||||
raise AzureOpenAIError(
|
raise AzureOpenAIError(
|
||||||
status_code=status_code, message=str(e), headers=error_headers
|
status_code=status_code, message=message, headers=error_headers
|
||||||
)
|
)
|
||||||
|
|
||||||
async def aembedding(
|
async def aembedding(
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
|
from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -179,6 +180,7 @@ class AsyncHTTPHandler:
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
logging_obj: Optional[LiteLLMLoggingObject] = None,
|
logging_obj: Optional[LiteLLMLoggingObject] = None,
|
||||||
):
|
):
|
||||||
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
timeout = self.timeout
|
timeout = self.timeout
|
||||||
|
@ -207,6 +209,8 @@ class AsyncHTTPHandler:
|
||||||
finally:
|
finally:
|
||||||
await new_client.aclose()
|
await new_client.aclose()
|
||||||
except httpx.TimeoutException as e:
|
except httpx.TimeoutException as e:
|
||||||
|
end_time = time.time()
|
||||||
|
time_delta = round(end_time - start_time, 3)
|
||||||
headers = {}
|
headers = {}
|
||||||
error_response = getattr(e, "response", None)
|
error_response = getattr(e, "response", None)
|
||||||
if error_response is not None:
|
if error_response is not None:
|
||||||
|
@ -214,7 +218,7 @@ class AsyncHTTPHandler:
|
||||||
headers["response_headers-{}".format(key)] = value
|
headers["response_headers-{}".format(key)] = value
|
||||||
|
|
||||||
raise litellm.Timeout(
|
raise litellm.Timeout(
|
||||||
message=f"Connection timed out after {timeout} seconds.",
|
message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
|
||||||
model="default-model-name",
|
model="default-model-name",
|
||||||
llm_provider="litellm-httpx-handler",
|
llm_provider="litellm-httpx-handler",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -5,6 +5,12 @@ model_list:
|
||||||
- model_name: gpt-4
|
- model_name: gpt-4
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: gpt-3.5-turbo
|
||||||
|
- model_name: azure-gpt-35-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
timeout: 0.000000001
|
||||||
- model_name: o3-mini
|
- model_name: o3-mini
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: o3-mini
|
model: o3-mini
|
||||||
|
@ -12,7 +18,7 @@ model_list:
|
||||||
- model_name: anthropic-claude
|
- model_name: anthropic-claude
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: claude-3-5-haiku-20241022
|
model: claude-3-5-haiku-20241022
|
||||||
mock_response: Hi!
|
timeout: 0.000000001
|
||||||
- model_name: groq/*
|
- model_name: groq/*
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: groq/*
|
model: groq/*
|
||||||
|
@ -28,16 +34,11 @@ model_list:
|
||||||
api_key: fake-key
|
api_key: fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
general_settings:
|
litellm_settings:
|
||||||
enable_jwt_auth: True
|
cache: true
|
||||||
litellm_jwtauth:
|
|
||||||
team_id_jwt_field: "client_id"
|
|
||||||
team_id_upsert: true
|
router_settings:
|
||||||
scope_mappings:
|
redis_host: os.environ/REDIS_HOST
|
||||||
- scope: litellm.api.consumer
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
models: ["anthropic-claude"]
|
redis_port: os.environ/REDIS_PORT
|
||||||
routes: ["/v1/chat/completions"]
|
|
||||||
- scope: litellm.api.gpt_3_5_turbo
|
|
||||||
models: ["gpt-3.5-turbo-testing"]
|
|
||||||
enforce_scope_based_access: true
|
|
||||||
enforce_rbac: true
|
|
|
@ -1,12 +1,15 @@
|
||||||
import copy
|
from typing import Any, Dict
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching.caching import RedisCache
|
from litellm.caching.caching import RedisCache
|
||||||
|
from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
|
|
||||||
|
masker = SensitiveDataMasker()
|
||||||
|
|
||||||
router = APIRouter(
|
router = APIRouter(
|
||||||
prefix="/cache",
|
prefix="/cache",
|
||||||
tags=["caching"],
|
tags=["caching"],
|
||||||
|
@ -21,27 +24,30 @@ async def cache_ping():
|
||||||
"""
|
"""
|
||||||
Endpoint for checking if cache can be pinged
|
Endpoint for checking if cache can be pinged
|
||||||
"""
|
"""
|
||||||
litellm_cache_params = {}
|
litellm_cache_params: Dict[str, Any] = {}
|
||||||
specific_cache_params = {}
|
specific_cache_params: Dict[str, Any] = {}
|
||||||
try:
|
try:
|
||||||
|
|
||||||
if litellm.cache is None:
|
if litellm.cache is None:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=503, detail="Cache not initialized. litellm.cache is None"
|
status_code=503, detail="Cache not initialized. litellm.cache is None"
|
||||||
)
|
)
|
||||||
|
litellm_cache_params = {}
|
||||||
|
specific_cache_params = {}
|
||||||
for k, v in vars(litellm.cache).items():
|
for k, v in vars(litellm.cache).items():
|
||||||
try:
|
try:
|
||||||
if k == "cache":
|
if k == "cache":
|
||||||
continue
|
continue
|
||||||
litellm_cache_params[k] = str(copy.deepcopy(v))
|
litellm_cache_params[k] = v
|
||||||
except Exception:
|
except Exception:
|
||||||
litellm_cache_params[k] = "<unable to copy or convert>"
|
litellm_cache_params[k] = "<unable to copy or convert>"
|
||||||
for k, v in vars(litellm.cache.cache).items():
|
for k, v in vars(litellm.cache.cache).items():
|
||||||
try:
|
try:
|
||||||
specific_cache_params[k] = str(v)
|
specific_cache_params[k] = v
|
||||||
except Exception:
|
except Exception:
|
||||||
specific_cache_params[k] = "<unable to copy or convert>"
|
specific_cache_params[k] = "<unable to copy or convert>"
|
||||||
|
litellm_cache_params = masker.mask_dict(litellm_cache_params)
|
||||||
|
specific_cache_params = masker.mask_dict(specific_cache_params)
|
||||||
if litellm.cache.type == "redis":
|
if litellm.cache.type == "redis":
|
||||||
# ping the redis cache
|
# ping the redis cache
|
||||||
ping_response = await litellm.cache.ping()
|
ping_response = await litellm.cache.ping()
|
||||||
|
@ -56,6 +62,7 @@ async def cache_ping():
|
||||||
messages=[{"role": "user", "content": "test from litellm"}],
|
messages=[{"role": "user", "content": "test from litellm"}],
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
|
verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"cache_type": litellm.cache.type,
|
"cache_type": litellm.cache.type,
|
||||||
|
|
|
@ -13,6 +13,7 @@ IGNORE_FUNCTIONS = [
|
||||||
"add_object_type",
|
"add_object_type",
|
||||||
"strip_field",
|
"strip_field",
|
||||||
"_transform_prompt",
|
"_transform_prompt",
|
||||||
|
"mask_dict",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue