diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index c7613017e..98cc97d53 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -724,16 +724,32 @@ class AzureChatCompletion(BaseLLM): client_session = litellm.aclient_session or httpx.AsyncClient( transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls ) - client = AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=api_base, - api_key=api_key, - timeout=timeout, - http_client=client_session, - ) + if "gateway.ai.cloudflare.com" in api_base: + ## build base url - assume api base includes resource name + if not api_base.endswith("/"): + api_base += "/" + api_base += f"{model}" + client = AsyncAzureOpenAI( + base_url=api_base, + api_version=api_version, + api_key=api_key, + timeout=timeout, + http_client=client_session, + ) + model = None + # cloudflare ai gateway, needs model=None + else: + client = AsyncAzureOpenAI( + api_version=api_version, + azure_endpoint=api_base, + api_key=api_key, + timeout=timeout, + http_client=client_session, + ) - if model is None and mode != "image_generation": - raise Exception("model is not set") + # only run this check if it's not cloudflare ai gateway + if model is None and mode != "image_generation": + raise Exception("model is not set") completion = None diff --git a/litellm/tests/test_configs/test_config_no_auth.yaml b/litellm/tests/test_configs/test_config_no_auth.yaml index e3bf91456..be85765a8 100644 --- a/litellm/tests/test_configs/test_config_no_auth.yaml +++ b/litellm/tests/test_configs/test_config_no_auth.yaml @@ -9,6 +9,11 @@ model_list: api_key: os.environ/AZURE_CANADA_API_KEY model: azure/gpt-35-turbo model_name: azure-model +- litellm_params: + api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 + api_key: os.environ/AZURE_API_KEY + model: azure/chatgpt-v-2 + model_name: azure-cloudflare-model - litellm_params: api_base: https://openai-france-1234.openai.azure.com api_key: os.environ/AZURE_FRANCE_API_KEY diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index ac4ebb585..294a5a096 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -45,7 +45,7 @@ def client_no_auth(): filepath = os.path.dirname(os.path.abspath(__file__)) config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml" # initialize can get run in parallel, it sets specific variables for the fast api app, sinc eit gets run in parallel different tests use the wrong variables - initialize(config=config_fp) + initialize(config=config_fp, debug=True) app = FastAPI() app.include_router(router) # Include your router in the test app