[Bug Fix] Azure Blob Storage fixes (#10059)

* Simple fix for #9339 - upgrade the underlying library and cache the azure storage client (#9965)

* fix -  use constants for caching azure storage client

---------

Co-authored-by: Adrian Lyjak <adrian@chatmeter.com>
This commit is contained in:
Ishaan Jaff 2025-04-16 09:47:10 -07:00 committed by GitHub
parent a743b6fc1f
commit a9e8a36f89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 38 additions and 15 deletions

View file

@ -21,6 +21,9 @@ DEFAULT_MAX_TOKENS = 256 # used when providers need a default
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
########## Networking constants ##############################################################
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
########### v2 Architecture constants for managing writing updates to the database ###########
REDIS_UPDATE_BUFFER_KEY = "litellm_spend_update_buffer"
REDIS_DAILY_SPEND_UPDATE_BUFFER_KEY = "litellm_daily_spend_update_buffer"

View file

@ -1,12 +1,13 @@
import asyncio
import json
import os
import time
import uuid
from datetime import datetime, timedelta
from typing import List, Optional
from litellm._logging import verbose_logger
from litellm.constants import AZURE_STORAGE_MSFT_VERSION
from litellm.constants import _DEFAULT_TTL_FOR_HTTPX_CLIENTS, AZURE_STORAGE_MSFT_VERSION
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.azure.common_utils import get_azure_ad_token_from_entra_id
from litellm.llms.custom_httpx.http_handler import (
@ -48,14 +49,17 @@ class AzureBlobStorageLogger(CustomBatchLogger):
"Missing required environment variable: AZURE_STORAGE_FILE_SYSTEM"
)
self.azure_storage_file_system: str = _azure_storage_file_system
self._service_client = None
# Time that the azure service client expires, in order to reset the connection pool and keep it fresh
self._service_client_timeout: Optional[float] = None
# Internal variables used for Token based authentication
self.azure_auth_token: Optional[
str
] = None # the Azure AD token to use for Azure Storage API requests
self.token_expiry: Optional[
datetime
] = None # the expiry time of the currentAzure AD token
self.azure_auth_token: Optional[str] = (
None # the Azure AD token to use for Azure Storage API requests
)
self.token_expiry: Optional[datetime] = (
None # the expiry time of the currentAzure AD token
)
asyncio.create_task(self.periodic_flush())
self.flush_lock = asyncio.Lock()
@ -324,6 +328,25 @@ class AzureBlobStorageLogger(CustomBatchLogger):
f"AzureBlobStorageLogger is only available for premium users. {CommonProxyErrors.not_premium_user}"
)
async def get_service_client(self):
from azure.storage.filedatalake.aio import DataLakeServiceClient
# expire old clients to recover from connection issues
if (
self._service_client_timeout
and self._service_client
and self._service_client_timeout > time.time()
):
await self._service_client.close()
self._service_client = None
if not self._service_client:
self._service_client = DataLakeServiceClient(
account_url=f"https://{self.azure_storage_account_name}.dfs.core.windows.net",
credential=self.azure_storage_account_key,
)
self._service_client_timeout = time.time() + _DEFAULT_TTL_FOR_HTTPX_CLIENTS
return self._service_client
async def upload_to_azure_data_lake_with_azure_account_key(
self, payload: StandardLoggingPayload
):
@ -332,13 +355,10 @@ class AzureBlobStorageLogger(CustomBatchLogger):
This is used when Azure Storage Account Key is set - Azure Storage Account Key does not work directly with Azure Rest API
"""
from azure.storage.filedatalake.aio import DataLakeServiceClient
# Create an async service client
service_client = DataLakeServiceClient(
account_url=f"https://{self.azure_storage_account_name}.dfs.core.windows.net",
credential=self.azure_storage_account_key,
)
service_client = await self.get_service_client()
# Get file system client
file_system_client = service_client.get_file_system_client(
file_system=self.azure_storage_file_system

View file

@ -8,6 +8,7 @@ import httpx
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
import litellm
from litellm.constants import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
from litellm.types.llms.custom_http import *
@ -31,7 +32,6 @@ headers = {
# https://www.python-httpx.org/advanced/timeouts
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
def mask_sensitive_info(error_message):

View file

@ -29,7 +29,7 @@ python-multipart==0.0.18 # admin UI
Pillow==11.0.0
azure-ai-contentsafety==1.0.0 # for azure content safety
azure-identity==1.16.1 # for azure content safety
azure-storage-file-datalake==12.15.0 # for azure buck storage logging
azure-storage-file-datalake==12.20.0 # for azure buck storage logging
opentelemetry-api==1.25.0
opentelemetry-sdk==1.25.0
opentelemetry-exporter-otlp==1.25.0
@ -51,4 +51,4 @@ tenacity==8.2.3 # for retrying requests, when litellm.num_retries set
pydantic==2.10.2 # proxy + openai req.
jsonschema==4.22.0 # validating json schema
websockets==13.1.0 # for realtime API
####
####