diff --git a/litellm/llms/AzureOpenAI/azure.py b/litellm/llms/AzureOpenAI/azure.py index adf294f74..34ba7d7e0 100644 --- a/litellm/llms/AzureOpenAI/azure.py +++ b/litellm/llms/AzureOpenAI/azure.py @@ -135,6 +135,7 @@ class AzureOpenAIConfig: "temperature", "n", "stream", + "stream_options", "stop", "max_tokens", "max_completion_tokens", @@ -938,6 +939,7 @@ class AzureChatCompletion(BaseLLM): model=model, custom_llm_provider="azure", logging_obj=logging_obj, + stream_options=data.get("stream_options", None), _response_headers=process_azure_headers(headers), ) return streamwrapper @@ -1006,6 +1008,7 @@ class AzureChatCompletion(BaseLLM): model=model, custom_llm_provider="azure", logging_obj=logging_obj, + stream_options=data.get("stream_options", None), _response_headers=headers, ) return streamwrapper ## DO NOT make this into an async for ... loop, it will yield an async generator, which won't raise errors if the response fails