Litellm dev 12 30 2024 p1 (#7480)

* test(azure_openai_o1.py): initial commit with testing for azure openai o1 preview model * fix(base_llm_unit_tests.py): handle azure o1 preview response format tests skip as o1 on azure doesn't support tool calling yet * fix: initial commit of azure o1 handler using openai caller simplifies calling + allows fake streaming logic alr. implemented for openai to just work * feat(azure/o1_handler.py): fake o1 streaming for azure o1 models azure does not currently support streaming for o1 * feat(o1_transformation.py): support overriding 'should_fake_stream' on azure/o1 via 'supports_native_streaming' param on model info enables user to toggle on when azure allows o1 streaming without needing to bump versions * style(router.py): remove 'give feedback/get help' messaging when router is used Prevents noisy messaging Closes https://github.com/BerriAI/litellm/issues/5942 * test: fix azure o1 test * test: fix tests * fix: fix test
2025-04-26 11:14:04 +00:00 · 2024-12-30 21:52:52 -08:00 · 2024-12-30 21:52:52 -08:00 · 0178e75cd9
commit 0178e75cd9
parent f0ed02d3ee
17 changed files with 273 additions and 141 deletions
--- a/litellm/llms/azure/common_utils.py
+++ b/litellm/llms/azure/common_utils.py
@ -1,7 +1,9 @@
 from typing import Callable, Optional, Union

 import httpx
+from openai import AsyncAzureOpenAI, AzureOpenAI

+import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.secret_managers.main import get_secret_str
@ -25,6 +27,39 @@ class AzureOpenAIError(BaseLLMException):
        )


+def get_azure_openai_client(
+    api_key: Optional[str],
+    api_base: Optional[str],
+    timeout: Union[float, httpx.Timeout],
+    max_retries: Optional[int],
+    api_version: Optional[str] = None,
+    organization: Optional[str] = None,
+    client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
+    _is_async: bool = False,
+) -> Optional[Union[AzureOpenAI, AsyncAzureOpenAI]]:
+    received_args = locals()
+    openai_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None
+    if client is None:
+        data = {}
+        for k, v in received_args.items():
+            if k == "self" or k == "client" or k == "_is_async":
+                pass
+            elif k == "api_base" and v is not None:
+                data["azure_endpoint"] = v
+            elif v is not None:
+                data[k] = v
+        if "api_version" not in data:
+            data["api_version"] = litellm.AZURE_DEFAULT_API_VERSION
+        if _is_async is True:
+            openai_client = AsyncAzureOpenAI(**data)
+        else:
+            openai_client = AzureOpenAI(**data)  # type: ignore
+    else:
+        openai_client = client
+
+    return openai_client
+
+
 def process_azure_headers(headers: Union[httpx.Headers, dict]) -> dict:
    openai_headers = {}
    if "x-ratelimit-limit-requests" in headers: