(feat) proxy Azure Blob Storage - Add support for AZURE_STORAGE_ACCOUNT_KEY Auth (#7280)

* add upload_to_azure_data_lake_with_azure_account_key

* async_upload_payload_to_azure_blob_storage

* docs add AZURE_STORAGE_ACCOUNT_KEY

* add azure-storage-file-datalake
This commit is contained in:
Ishaan Jaff 2024-12-17 17:35:45 -08:00 committed by GitHub
parent c6a2130510
commit f3c546b79e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 113 additions and 41 deletions

View file

@ -317,6 +317,7 @@ router_settings:
| AZURE_CLIENT_SECRET | Client secret for Azure services | AZURE_CLIENT_SECRET | Client secret for Azure services
| AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token | AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
| AZURE_KEY_VAULT_URI | URI for Azure Key Vault | AZURE_KEY_VAULT_URI | URI for Azure Key Vault
| AZURE_STORAGE_ACCOUNT_KEY | The Azure Storage Account Key to use for Authentication to Azure Blob Storage logging
| AZURE_STORAGE_ACCOUNT_NAME | Name of the Azure Storage Account to use for logging to Azure Blob Storage | AZURE_STORAGE_ACCOUNT_NAME | Name of the Azure Storage Account to use for logging to Azure Blob Storage
| AZURE_STORAGE_FILE_SYSTEM | Name of the Azure Storage File System to use for logging to Azure Blob Storage. (Typically the Container name) | AZURE_STORAGE_FILE_SYSTEM | Name of the Azure Storage File System to use for logging to Azure Blob Storage. (Typically the Container name)
| AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging | AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging

View file

@ -950,8 +950,15 @@ litellm_settings:
2. Set required env variables 2. Set required env variables
```shell ```shell
# Required Environment Variables for Azure Storage
AZURE_STORAGE_ACCOUNT_NAME="litellm2" # The name of the Azure Storage Account to use for logging AZURE_STORAGE_ACCOUNT_NAME="litellm2" # The name of the Azure Storage Account to use for logging
AZURE_STORAGE_FILE_SYSTEM="litellm-logs" # The name of the Azure Storage File System to use for logging. (Typically the Container name) AZURE_STORAGE_FILE_SYSTEM="litellm-logs" # The name of the Azure Storage File System to use for logging. (Typically the Container name)
# Authentication Variables
# Option 1: Use Storage Account Key
AZURE_STORAGE_ACCOUNT_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # The Azure Storage Account Key to use for Authentication
# Option 2: Use Tenant ID + Client ID + Client Secret
AZURE_STORAGE_TENANT_ID="985efd7cxxxxxxxxxx" # The Application Tenant ID to use for Authentication AZURE_STORAGE_TENANT_ID="985efd7cxxxxxxxxxx" # The Application Tenant ID to use for Authentication
AZURE_STORAGE_CLIENT_ID="abe66585xxxxxxxxxx" # The Application Client ID to use for Authentication AZURE_STORAGE_CLIENT_ID="abe66585xxxxxxxxxx" # The Application Client ID to use for Authentication
AZURE_STORAGE_CLIENT_SECRET="uMS8Qxxxxxxxxxx" # The Application Client Secret to use for Authentication AZURE_STORAGE_CLIENT_SECRET="uMS8Qxxxxxxxxxx" # The Application Client Secret to use for Authentication

View file

@ -41,35 +41,22 @@ class AzureBlobStorageLogger(CustomBatchLogger):
verbose_logger.debug( verbose_logger.debug(
"AzureBlobStorageLogger: in init azure blob storage logger" "AzureBlobStorageLogger: in init azure blob storage logger"
) )
# check if the correct env variables are set
_tenant_id = os.getenv("AZURE_STORAGE_TENANT_ID")
if not _tenant_id:
raise ValueError(
"Missing required environment variable: AZURE_STORAGE_TENANT_ID"
)
self.tenant_id: str = _tenant_id
_client_id = os.getenv("AZURE_STORAGE_CLIENT_ID") # Env Variables used for Azure Storage Authentication
if not _client_id: self.tenant_id = os.getenv("AZURE_STORAGE_TENANT_ID")
raise ValueError( self.client_id = os.getenv("AZURE_STORAGE_CLIENT_ID")
"Missing required environment variable: AZURE_STORAGE_CLIENT_ID" self.client_secret = os.getenv("AZURE_STORAGE_CLIENT_SECRET")
self.azure_storage_account_key: Optional[str] = os.getenv(
"AZURE_STORAGE_ACCOUNT_KEY"
) )
self.client_id: str = _client_id
_client_secret = os.getenv("AZURE_STORAGE_CLIENT_SECRET")
if not _client_secret:
raise ValueError(
"Missing required environment variable: AZURE_STORAGE_CLIENT_SECRET"
)
self.client_secret: str = _client_secret
# Required Env Variables for Azure Storage
_azure_storage_account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME") _azure_storage_account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME")
if not _azure_storage_account_name: if not _azure_storage_account_name:
raise ValueError( raise ValueError(
"Missing required environment variable: AZURE_STORAGE_ACCOUNT_NAME" "Missing required environment variable: AZURE_STORAGE_ACCOUNT_NAME"
) )
self.azure_storage_account_name: str = _azure_storage_account_name self.azure_storage_account_name: str = _azure_storage_account_name
_azure_storage_file_system = os.getenv("AZURE_STORAGE_FILE_SYSTEM") _azure_storage_file_system = os.getenv("AZURE_STORAGE_FILE_SYSTEM")
if not _azure_storage_file_system: if not _azure_storage_file_system:
raise ValueError( raise ValueError(
@ -77,6 +64,7 @@ class AzureBlobStorageLogger(CustomBatchLogger):
) )
self.azure_storage_file_system: str = _azure_storage_file_system self.azure_storage_file_system: str = _azure_storage_file_system
# Internal variables used for Token based authentication
self.azure_auth_token: Optional[str] = ( self.azure_auth_token: Optional[str] = (
None # the Azure AD token to use for Azure Storage API requests None # the Azure AD token to use for Azure Storage API requests
) )
@ -162,9 +150,6 @@ class AzureBlobStorageLogger(CustomBatchLogger):
len(self.log_queue), len(self.log_queue),
) )
# Get a valid token instead of always requesting a new one
await self.set_valid_azure_ad_token()
for payload in self.log_queue: for payload in self.log_queue:
await self.async_upload_payload_to_azure_blob_storage(payload=payload) await self.async_upload_payload_to_azure_blob_storage(payload=payload)
@ -183,10 +168,20 @@ class AzureBlobStorageLogger(CustomBatchLogger):
3. Flush the data 3. Flush the data
""" """
try: try:
if self.azure_storage_account_key:
await self.upload_to_azure_data_lake_with_azure_account_key(
payload=payload
)
else:
# Get a valid token instead of always requesting a new one
await self.set_valid_azure_ad_token()
async_client = get_async_httpx_client( async_client = get_async_httpx_client(
llm_provider=httpxSpecialProvider.LoggingCallback llm_provider=httpxSpecialProvider.LoggingCallback
) )
json_payload = json.dumps(payload) + "\n" # Add newline for each log entry json_payload = (
json.dumps(payload) + "\n"
) # Add newline for each log entry
payload_bytes = json_payload.encode("utf-8") payload_bytes = json_payload.encode("utf-8")
filename = f"{payload.get('id') or str(uuid.uuid4())}.json" filename = f"{payload.get('id') or str(uuid.uuid4())}.json"
base_url = f"https://{self.azure_storage_account_name}.dfs.core.windows.net/{self.azure_storage_file_system}/{filename}" base_url = f"https://{self.azure_storage_account_name}.dfs.core.windows.net/{self.azure_storage_file_system}/{filename}"
@ -285,9 +280,9 @@ class AzureBlobStorageLogger(CustomBatchLogger):
def get_azure_ad_token_from_azure_storage( def get_azure_ad_token_from_azure_storage(
self, self,
tenant_id: str, tenant_id: Optional[str],
client_id: str, client_id: Optional[str],
client_secret: str, client_secret: Optional[str],
) -> str: ) -> str:
""" """
Gets Azure AD token to use for Azure Storage API requests Gets Azure AD token to use for Azure Storage API requests
@ -299,6 +294,19 @@ class AzureBlobStorageLogger(CustomBatchLogger):
client_id, client_id,
client_secret, client_secret,
) )
if tenant_id is None:
raise ValueError(
"Missing required environment variable: AZURE_STORAGE_TENANT_ID"
)
if client_id is None:
raise ValueError(
"Missing required environment variable: AZURE_STORAGE_CLIENT_ID"
)
if client_secret is None:
raise ValueError(
"Missing required environment variable: AZURE_STORAGE_CLIENT_SECRET"
)
token_provider = get_azure_ad_token_from_entrata_id( token_provider = get_azure_ad_token_from_entrata_id(
tenant_id=tenant_id, tenant_id=tenant_id,
client_id=client_id, client_id=client_id,
@ -331,3 +339,58 @@ class AzureBlobStorageLogger(CustomBatchLogger):
raise ValueError( raise ValueError(
f"AzureBlobStorageLogger is only available for premium users. {CommonProxyErrors.not_premium_user}" f"AzureBlobStorageLogger is only available for premium users. {CommonProxyErrors.not_premium_user}"
) )
async def upload_to_azure_data_lake_with_azure_account_key(
self, payload: StandardLoggingPayload
):
"""
Uploads the payload to Azure Data Lake using the Azure SDK
This is used when Azure Storage Account Key is set - Azure Storage Account Key does not work directly with Azure Rest API
"""
from azure.storage.filedatalake.aio import DataLakeServiceClient
# Create an async service client
service_client = DataLakeServiceClient(
account_url=f"https://{self.azure_storage_account_name}.dfs.core.windows.net",
credential=self.azure_storage_account_key,
)
# Get file system client
file_system_client = service_client.get_file_system_client(
file_system=self.azure_storage_file_system
)
try:
# Create directory with today's date
from datetime import datetime
today = datetime.now().strftime("%Y-%m-%d")
directory_client = file_system_client.get_directory_client(today)
# check if the directory exists
if not await directory_client.exists():
await directory_client.create_directory()
verbose_logger.debug(f"Created directory: {today}")
# Create a file client
file_name = f"{payload.get('id') or str(uuid.uuid4())}.json"
file_client = directory_client.get_file_client(file_name)
# Create the file
await file_client.create_file()
# Content to append
content = json.dumps(payload).encode("utf-8")
# Append content to the file
await file_client.append_data(data=content, offset=0, length=len(content))
# Flush the content to finalize the file
await file_client.flush_data(position=len(content), offset=0)
verbose_logger.debug(
f"Successfully uploaded and wrote to {today}/{file_name}"
)
except Exception as e:
verbose_logger.exception(f"Error occurred: {str(e)}")

View file

@ -27,6 +27,7 @@ python-multipart==0.0.9 # admin UI
Pillow==10.3.0 Pillow==10.3.0
azure-ai-contentsafety==1.0.0 # for azure content safety azure-ai-contentsafety==1.0.0 # for azure content safety
azure-identity==1.16.1 # for azure content safety azure-identity==1.16.1 # for azure content safety
azure-storage-file-datalake==12.15.0 # for azure buck storage logging
opentelemetry-api==1.25.0 opentelemetry-api==1.25.0
opentelemetry-sdk==1.25.0 opentelemetry-sdk==1.25.0
opentelemetry-exporter-otlp==1.25.0 opentelemetry-exporter-otlp==1.25.0