diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md index be3401fd2e..8a9603f5ce 100644 --- a/docs/my-website/docs/providers/azure.md +++ b/docs/my-website/docs/providers/azure.md @@ -1,3 +1,8 @@ + +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Azure OpenAI ## API Keys, Params api_key, api_base, api_version etc can be passed directly to `litellm.completion` - see here or set as `litellm.api_key` params see here @@ -12,7 +17,7 @@ os.environ["AZURE_AD_TOKEN"] = "" os.environ["AZURE_API_TYPE"] = "" ``` -## Usage +## **Usage - LiteLLM Python SDK** Open In Colab @@ -64,6 +69,126 @@ response = litellm.completion( ) ``` + +## **Usage - LiteLLM Proxy Server** + +Here's how to call Azure OpenAI models with the LiteLLM Proxy Server + +### 1. Save key in your environment + +```bash +export AZURE_API_KEY="" +``` + +### 2. Start the proxy + + + + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. +``` + + + +This uses the `ClientSecretCredential` + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + tenant_id: os.environ/AZURE_TENANT_ID + client_id: os.environ/AZURE_CLIENT_ID + client_secret: os.environ/AZURE_CLIENT_SECRET +``` + + + + +### 3. Test it + + + + + +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--data ' { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] + } +' +``` + + + +```python +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } +]) + +print(response) + +``` + + + +```python +from langchain.chat_models import ChatOpenAI +from langchain.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain.schema import HumanMessage, SystemMessage + +chat = ChatOpenAI( + openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy + model = "gpt-3.5-turbo", + temperature=0.1 +) + +messages = [ + SystemMessage( + content="You are a helpful assistant that im using to make a test request to." + ), + HumanMessage( + content="test from litellm. tell me why it's amazing in 1 sentence" + ), +] +response = chat(messages) + +print(response) +``` + + + + + ## Azure OpenAI Chat Completion Models :::tip diff --git a/litellm/main.py b/litellm/main.py index cf37d46d15..45e164a899 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3231,6 +3231,9 @@ def embedding( "cooldown_time", "tags", "azure_ad_token_provider", + "tenant_id", + "client_id", + "client_secret", ] default_params = openai_params + litellm_params non_default_params = { diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 7d9b97246c..c8599d56ef 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -4,8 +4,9 @@ model_list: model: azure/chatgpt-v-2 api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" - azure_ad_token_provider: True - + tenant_id: os.environ/AZURE_TENANT_ID + client_id: os.environ/AZURE_CLIENT_ID + client_secret: os.environ/AZURE_CLIENT_SECRET guardrails: - guardrail_name: "lakera-pre-guard" diff --git a/litellm/router_utils/client_initalization_utils.py b/litellm/router_utils/client_initalization_utils.py index b0b13fac7f..1dda941b42 100644 --- a/litellm/router_utils/client_initalization_utils.py +++ b/litellm/router_utils/client_initalization_utils.py @@ -173,10 +173,13 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): organization = litellm.get_secret(organization_env_name) litellm_params["organization"] = organization - azure_ad_token_provider = litellm_params.get("azure_ad_token_provider", None) - if azure_ad_token_provider is not None: + if litellm_params.get("tenant_id"): verbose_router_logger.debug("Using Azure AD Token Provider for Azure Auth") - azure_ad_token_provider = get_azure_ad_token_from_entrata_id() + azure_ad_token_provider = get_azure_ad_token_from_entrata_id( + tenant_id=litellm_params.get("tenant_id"), + client_id=litellm_params.get("client_id"), + client_secret=litellm_params.get("client_secret"), + ) if custom_llm_provider == "azure" or custom_llm_provider == "azure_text": if api_base is None or not isinstance(api_base, str): @@ -507,13 +510,37 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): ) # cache for 1 hr -def get_azure_ad_token_from_entrata_id() -> Callable[[], str]: - from azure.identity import DefaultAzureCredential, get_bearer_token_provider +def get_azure_ad_token_from_entrata_id( + tenant_id: str, client_id: str, client_secret: str +) -> Callable[[], str]: + from azure.identity import ( + ClientSecretCredential, + DefaultAzureCredential, + get_bearer_token_provider, + ) verbose_router_logger.debug("Getting Azure AD Token from Entrata ID") + if tenant_id.startswith("os.environ/"): + tenant_id = litellm.get_secret(tenant_id) + + if client_id.startswith("os.environ/"): + client_id = litellm.get_secret(client_id) + + if client_secret.startswith("os.environ/"): + client_secret = litellm.get_secret(client_secret) + verbose_router_logger.debug( + "tenant_id %s, client_id %s, client_secret %s", + tenant_id, + client_id, + client_secret, + ) + credential = ClientSecretCredential(tenant_id, client_id, client_secret) + + verbose_router_logger.debug("credential %s", credential) + token_provider = get_bearer_token_provider( - DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" + credential, "https://cognitiveservices.azure.com/.default" ) verbose_router_logger.debug("token_provider %s", token_provider) diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 62a3a5bcd4..21eae868a0 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1117,6 +1117,9 @@ all_litellm_params = [ "cache_key", "max_retries", "azure_ad_token_provider", + "tenant_id", + "client_id", + "client_secret", ]