diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 00f6981a52..17fe69bad2 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -317,6 +317,7 @@ router_settings: | AZURE_CLIENT_SECRET | Client secret for Azure services | AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token | AZURE_KEY_VAULT_URI | URI for Azure Key Vault +| AZURE_STORAGE_ACCOUNT_KEY | The Azure Storage Account Key to use for Authentication to Azure Blob Storage logging | AZURE_STORAGE_ACCOUNT_NAME | Name of the Azure Storage Account to use for logging to Azure Blob Storage | AZURE_STORAGE_FILE_SYSTEM | Name of the Azure Storage File System to use for logging to Azure Blob Storage. (Typically the Container name) | AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 27376e2cd4..d665b4a331 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -950,8 +950,15 @@ litellm_settings: 2. Set required env variables ```shell +# Required Environment Variables for Azure Storage AZURE_STORAGE_ACCOUNT_NAME="litellm2" # The name of the Azure Storage Account to use for logging AZURE_STORAGE_FILE_SYSTEM="litellm-logs" # The name of the Azure Storage File System to use for logging. (Typically the Container name) + +# Authentication Variables +# Option 1: Use Storage Account Key +AZURE_STORAGE_ACCOUNT_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # The Azure Storage Account Key to use for Authentication + +# Option 2: Use Tenant ID + Client ID + Client Secret AZURE_STORAGE_TENANT_ID="985efd7cxxxxxxxxxx" # The Application Tenant ID to use for Authentication AZURE_STORAGE_CLIENT_ID="abe66585xxxxxxxxxx" # The Application Client ID to use for Authentication AZURE_STORAGE_CLIENT_SECRET="uMS8Qxxxxxxxxxx" # The Application Client Secret to use for Authentication diff --git a/litellm/integrations/azure_storage/azure_storage.py b/litellm/integrations/azure_storage/azure_storage.py index 0982268b44..977a253f69 100644 --- a/litellm/integrations/azure_storage/azure_storage.py +++ b/litellm/integrations/azure_storage/azure_storage.py @@ -41,35 +41,22 @@ class AzureBlobStorageLogger(CustomBatchLogger): verbose_logger.debug( "AzureBlobStorageLogger: in init azure blob storage logger" ) - # check if the correct env variables are set - _tenant_id = os.getenv("AZURE_STORAGE_TENANT_ID") - if not _tenant_id: - raise ValueError( - "Missing required environment variable: AZURE_STORAGE_TENANT_ID" - ) - self.tenant_id: str = _tenant_id - _client_id = os.getenv("AZURE_STORAGE_CLIENT_ID") - if not _client_id: - raise ValueError( - "Missing required environment variable: AZURE_STORAGE_CLIENT_ID" - ) - self.client_id: str = _client_id - - _client_secret = os.getenv("AZURE_STORAGE_CLIENT_SECRET") - if not _client_secret: - raise ValueError( - "Missing required environment variable: AZURE_STORAGE_CLIENT_SECRET" - ) - self.client_secret: str = _client_secret + # Env Variables used for Azure Storage Authentication + self.tenant_id = os.getenv("AZURE_STORAGE_TENANT_ID") + self.client_id = os.getenv("AZURE_STORAGE_CLIENT_ID") + self.client_secret = os.getenv("AZURE_STORAGE_CLIENT_SECRET") + self.azure_storage_account_key: Optional[str] = os.getenv( + "AZURE_STORAGE_ACCOUNT_KEY" + ) + # Required Env Variables for Azure Storage _azure_storage_account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME") if not _azure_storage_account_name: raise ValueError( "Missing required environment variable: AZURE_STORAGE_ACCOUNT_NAME" ) self.azure_storage_account_name: str = _azure_storage_account_name - _azure_storage_file_system = os.getenv("AZURE_STORAGE_FILE_SYSTEM") if not _azure_storage_file_system: raise ValueError( @@ -77,6 +64,7 @@ class AzureBlobStorageLogger(CustomBatchLogger): ) self.azure_storage_file_system: str = _azure_storage_file_system + # Internal variables used for Token based authentication self.azure_auth_token: Optional[str] = ( None # the Azure AD token to use for Azure Storage API requests ) @@ -162,9 +150,6 @@ class AzureBlobStorageLogger(CustomBatchLogger): len(self.log_queue), ) - # Get a valid token instead of always requesting a new one - await self.set_valid_azure_ad_token() - for payload in self.log_queue: await self.async_upload_payload_to_azure_blob_storage(payload=payload) @@ -183,22 +168,32 @@ class AzureBlobStorageLogger(CustomBatchLogger): 3. Flush the data """ try: - async_client = get_async_httpx_client( - llm_provider=httpxSpecialProvider.LoggingCallback - ) - json_payload = json.dumps(payload) + "\n" # Add newline for each log entry - payload_bytes = json_payload.encode("utf-8") - filename = f"{payload.get('id') or str(uuid.uuid4())}.json" - base_url = f"https://{self.azure_storage_account_name}.dfs.core.windows.net/{self.azure_storage_file_system}/{filename}" - # Execute the 3-step upload process - await self._create_file(async_client, base_url) - await self._append_data(async_client, base_url, json_payload) - await self._flush_data(async_client, base_url, len(payload_bytes)) + if self.azure_storage_account_key: + await self.upload_to_azure_data_lake_with_azure_account_key( + payload=payload + ) + else: + # Get a valid token instead of always requesting a new one + await self.set_valid_azure_ad_token() + async_client = get_async_httpx_client( + llm_provider=httpxSpecialProvider.LoggingCallback + ) + json_payload = ( + json.dumps(payload) + "\n" + ) # Add newline for each log entry + payload_bytes = json_payload.encode("utf-8") + filename = f"{payload.get('id') or str(uuid.uuid4())}.json" + base_url = f"https://{self.azure_storage_account_name}.dfs.core.windows.net/{self.azure_storage_file_system}/{filename}" - verbose_logger.debug( - f"Successfully uploaded log to Azure Blob Storage: {filename}" - ) + # Execute the 3-step upload process + await self._create_file(async_client, base_url) + await self._append_data(async_client, base_url, json_payload) + await self._flush_data(async_client, base_url, len(payload_bytes)) + + verbose_logger.debug( + f"Successfully uploaded log to Azure Blob Storage: {filename}" + ) except Exception as e: verbose_logger.exception(f"Error uploading to Azure Blob Storage: {str(e)}") @@ -285,9 +280,9 @@ class AzureBlobStorageLogger(CustomBatchLogger): def get_azure_ad_token_from_azure_storage( self, - tenant_id: str, - client_id: str, - client_secret: str, + tenant_id: Optional[str], + client_id: Optional[str], + client_secret: Optional[str], ) -> str: """ Gets Azure AD token to use for Azure Storage API requests @@ -299,6 +294,19 @@ class AzureBlobStorageLogger(CustomBatchLogger): client_id, client_secret, ) + if tenant_id is None: + raise ValueError( + "Missing required environment variable: AZURE_STORAGE_TENANT_ID" + ) + if client_id is None: + raise ValueError( + "Missing required environment variable: AZURE_STORAGE_CLIENT_ID" + ) + if client_secret is None: + raise ValueError( + "Missing required environment variable: AZURE_STORAGE_CLIENT_SECRET" + ) + token_provider = get_azure_ad_token_from_entrata_id( tenant_id=tenant_id, client_id=client_id, @@ -331,3 +339,58 @@ class AzureBlobStorageLogger(CustomBatchLogger): raise ValueError( f"AzureBlobStorageLogger is only available for premium users. {CommonProxyErrors.not_premium_user}" ) + + async def upload_to_azure_data_lake_with_azure_account_key( + self, payload: StandardLoggingPayload + ): + """ + Uploads the payload to Azure Data Lake using the Azure SDK + + This is used when Azure Storage Account Key is set - Azure Storage Account Key does not work directly with Azure Rest API + """ + from azure.storage.filedatalake.aio import DataLakeServiceClient + + # Create an async service client + service_client = DataLakeServiceClient( + account_url=f"https://{self.azure_storage_account_name}.dfs.core.windows.net", + credential=self.azure_storage_account_key, + ) + # Get file system client + file_system_client = service_client.get_file_system_client( + file_system=self.azure_storage_file_system + ) + + try: + # Create directory with today's date + from datetime import datetime + + today = datetime.now().strftime("%Y-%m-%d") + directory_client = file_system_client.get_directory_client(today) + + # check if the directory exists + if not await directory_client.exists(): + await directory_client.create_directory() + verbose_logger.debug(f"Created directory: {today}") + + # Create a file client + file_name = f"{payload.get('id') or str(uuid.uuid4())}.json" + file_client = directory_client.get_file_client(file_name) + + # Create the file + await file_client.create_file() + + # Content to append + content = json.dumps(payload).encode("utf-8") + + # Append content to the file + await file_client.append_data(data=content, offset=0, length=len(content)) + + # Flush the content to finalize the file + await file_client.flush_data(position=len(content), offset=0) + + verbose_logger.debug( + f"Successfully uploaded and wrote to {today}/{file_name}" + ) + + except Exception as e: + verbose_logger.exception(f"Error occurred: {str(e)}") diff --git a/requirements.txt b/requirements.txt index 7b1abcdc68..51d3718a52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,7 @@ python-multipart==0.0.9 # admin UI Pillow==10.3.0 azure-ai-contentsafety==1.0.0 # for azure content safety azure-identity==1.16.1 # for azure content safety +azure-storage-file-datalake==12.15.0 # for azure buck storage logging opentelemetry-api==1.25.0 opentelemetry-sdk==1.25.0 opentelemetry-exporter-otlp==1.25.0