mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
fix(client_initialization_utils.py): refactor azure client init logic
This commit is contained in:
parent
7696147968
commit
8845f0947d
1 changed files with 69 additions and 266 deletions
|
@ -194,272 +194,6 @@ class InitalizeOpenAISDKClient:
|
||||||
organization_env_name = organization.replace("os.environ/", "")
|
organization_env_name = organization.replace("os.environ/", "")
|
||||||
organization = get_secret_str(organization_env_name)
|
organization = get_secret_str(organization_env_name)
|
||||||
litellm_params["organization"] = organization
|
litellm_params["organization"] = organization
|
||||||
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
|
||||||
# If we have api_key, then we have higher priority
|
|
||||||
if not api_key and litellm_params.get("tenant_id"):
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
"Using Azure AD Token Provider for Azure Auth"
|
|
||||||
)
|
|
||||||
azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
|
|
||||||
tenant_id=litellm_params.get("tenant_id"),
|
|
||||||
client_id=litellm_params.get("client_id"),
|
|
||||||
client_secret=litellm_params.get("client_secret"),
|
|
||||||
)
|
|
||||||
if litellm_params.get("azure_username") and litellm_params.get(
|
|
||||||
"azure_password"
|
|
||||||
):
|
|
||||||
azure_ad_token_provider = get_azure_ad_token_from_username_password(
|
|
||||||
azure_username=litellm_params.get("azure_username"),
|
|
||||||
azure_password=litellm_params.get("azure_password"),
|
|
||||||
client_id=litellm_params.get("client_id"),
|
|
||||||
)
|
|
||||||
|
|
||||||
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
|
||||||
if api_base is None or not isinstance(api_base, str):
|
|
||||||
filtered_litellm_params = {
|
|
||||||
k: v
|
|
||||||
for k, v in model["litellm_params"].items()
|
|
||||||
if k != "api_key"
|
|
||||||
}
|
|
||||||
_filtered_model = {
|
|
||||||
"model_name": model["model_name"],
|
|
||||||
"litellm_params": filtered_litellm_params,
|
|
||||||
}
|
|
||||||
raise ValueError(
|
|
||||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
|
||||||
)
|
|
||||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
|
||||||
if azure_ad_token is not None:
|
|
||||||
if azure_ad_token.startswith("oidc/"):
|
|
||||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
|
||||||
elif (
|
|
||||||
not api_key and azure_ad_token_provider is None
|
|
||||||
and litellm.enable_azure_ad_token_refresh is True
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
azure_ad_token_provider = get_azure_ad_token_provider()
|
|
||||||
except ValueError:
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
"Azure AD Token Provider could not be used."
|
|
||||||
)
|
|
||||||
if api_version is None:
|
|
||||||
api_version = os.getenv(
|
|
||||||
"AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
|
|
||||||
)
|
|
||||||
|
|
||||||
if "gateway.ai.cloudflare.com" in api_base:
|
|
||||||
if not api_base.endswith("/"):
|
|
||||||
api_base += "/"
|
|
||||||
azure_model = model_name.replace("azure/", "")
|
|
||||||
api_base += f"{azure_model}"
|
|
||||||
cache_key = f"{model_id}_async_client"
|
|
||||||
_client = openai.AsyncAzureOpenAI(
|
|
||||||
api_key=api_key,
|
|
||||||
azure_ad_token=azure_ad_token,
|
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
|
||||||
base_url=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
|
||||||
litellm_router_instance=litellm_router_instance
|
|
||||||
):
|
|
||||||
cache_key = f"{model_id}_client"
|
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
|
||||||
api_key=api_key,
|
|
||||||
azure_ad_token=azure_ad_token,
|
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
|
||||||
base_url=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.Client(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
# streaming clients can have diff timeouts
|
|
||||||
cache_key = f"{model_id}_stream_async_client"
|
|
||||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
|
||||||
api_key=api_key,
|
|
||||||
azure_ad_token=azure_ad_token,
|
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
|
||||||
base_url=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
timeout=stream_timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
|
||||||
litellm_router_instance=litellm_router_instance
|
|
||||||
):
|
|
||||||
cache_key = f"{model_id}_stream_client"
|
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
|
||||||
api_key=api_key,
|
|
||||||
azure_ad_token=azure_ad_token,
|
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
|
||||||
base_url=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
timeout=stream_timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.Client(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
else:
|
|
||||||
_api_key = api_key
|
|
||||||
if _api_key is not None and isinstance(_api_key, str):
|
|
||||||
# only show first 5 chars of api_key
|
|
||||||
_api_key = _api_key[:8] + "*" * 15
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
|
||||||
)
|
|
||||||
azure_client_params = {
|
|
||||||
"api_key": api_key,
|
|
||||||
"azure_endpoint": api_base,
|
|
||||||
"api_version": api_version,
|
|
||||||
"azure_ad_token": azure_ad_token,
|
|
||||||
"azure_ad_token_provider": azure_ad_token_provider,
|
|
||||||
}
|
|
||||||
|
|
||||||
if azure_ad_token_provider is not None:
|
|
||||||
azure_client_params["azure_ad_token_provider"] = (
|
|
||||||
azure_ad_token_provider
|
|
||||||
)
|
|
||||||
from litellm.llms.azure.azure import (
|
|
||||||
select_azure_base_url_or_endpoint,
|
|
||||||
)
|
|
||||||
|
|
||||||
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
|
||||||
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
|
||||||
azure_client_params = select_azure_base_url_or_endpoint(
|
|
||||||
azure_client_params
|
|
||||||
)
|
|
||||||
|
|
||||||
cache_key = f"{model_id}_async_client"
|
|
||||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
|
||||||
**azure_client_params,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
|
||||||
litellm_router_instance=litellm_router_instance
|
|
||||||
):
|
|
||||||
cache_key = f"{model_id}_client"
|
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
|
||||||
**azure_client_params,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.Client(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
# streaming clients should have diff timeouts
|
|
||||||
cache_key = f"{model_id}_stream_async_client"
|
|
||||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
|
||||||
**azure_client_params,
|
|
||||||
timeout=stream_timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
|
||||||
litellm_router_instance=litellm_router_instance
|
|
||||||
):
|
|
||||||
cache_key = f"{model_id}_stream_client"
|
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
|
||||||
**azure_client_params,
|
|
||||||
timeout=stream_timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.Client(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
_api_key = api_key # type: ignore
|
_api_key = api_key # type: ignore
|
||||||
|
@ -560,3 +294,72 @@ class InitalizeOpenAISDKClient:
|
||||||
ttl=client_ttl,
|
ttl=client_ttl,
|
||||||
local_only=True,
|
local_only=True,
|
||||||
) # cache for 1 hr
|
) # cache for 1 hr
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_azure_sdk_client(
|
||||||
|
litellm_params: dict,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
model_name: str,
|
||||||
|
api_version: Optional[str],
|
||||||
|
):
|
||||||
|
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
||||||
|
# If we have api_key, then we have higher priority
|
||||||
|
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||||
|
tenant_id = litellm_params.get("tenant_id")
|
||||||
|
client_id = litellm_params.get("client_id")
|
||||||
|
client_secret = litellm_params.get("client_secret")
|
||||||
|
azure_username = litellm_params.get("azure_username")
|
||||||
|
azure_password = litellm_params.get("azure_password")
|
||||||
|
if not api_key and tenant_id and client_id and client_secret:
|
||||||
|
verbose_router_logger.debug("Using Azure AD Token Provider for Azure Auth")
|
||||||
|
azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
client_id=client_id,
|
||||||
|
client_secret=client_secret,
|
||||||
|
)
|
||||||
|
if azure_username and azure_password and client_id:
|
||||||
|
azure_ad_token_provider = get_azure_ad_token_from_username_password(
|
||||||
|
azure_username=azure_username,
|
||||||
|
azure_password=azure_password,
|
||||||
|
client_id=client_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
if azure_ad_token is not None and azure_ad_token.startswith("oidc/"):
|
||||||
|
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||||
|
elif (
|
||||||
|
not api_key
|
||||||
|
and azure_ad_token_provider is None
|
||||||
|
and litellm.enable_azure_ad_token_refresh is True
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
azure_ad_token_provider = get_azure_ad_token_provider()
|
||||||
|
except ValueError:
|
||||||
|
verbose_router_logger.debug("Azure AD Token Provider could not be used.")
|
||||||
|
if api_version is None:
|
||||||
|
api_version = os.getenv("AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION)
|
||||||
|
|
||||||
|
_api_key = api_key
|
||||||
|
if _api_key is not None and isinstance(_api_key, str):
|
||||||
|
# only show first 5 chars of api_key
|
||||||
|
_api_key = _api_key[:8] + "*" * 15
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
||||||
|
)
|
||||||
|
azure_client_params = {
|
||||||
|
"api_key": api_key,
|
||||||
|
"azure_endpoint": api_base,
|
||||||
|
"api_version": api_version,
|
||||||
|
"azure_ad_token": azure_ad_token,
|
||||||
|
"azure_ad_token_provider": azure_ad_token_provider,
|
||||||
|
}
|
||||||
|
|
||||||
|
if azure_ad_token_provider is not None:
|
||||||
|
azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
|
||||||
|
from litellm.llms.azure.azure import select_azure_base_url_or_endpoint
|
||||||
|
|
||||||
|
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
||||||
|
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
||||||
|
azure_client_params = select_azure_base_url_or_endpoint(azure_client_params)
|
||||||
|
|
||||||
|
return azure_client_params
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue