forked from phoenix/litellm-mirror
(refactor) router - use static methods for client init utils (#6420)
* use InitalizeOpenAISDKClient * use InitalizeOpenAISDKClient static method * fix # noqa: PLR0915
This commit is contained in:
parent
cdda7c243f
commit
17e81d861c
2 changed files with 448 additions and 426 deletions
|
@ -63,10 +63,7 @@ from litellm.router_utils.batch_utils import (
|
||||||
_get_router_metadata_variable_name,
|
_get_router_metadata_variable_name,
|
||||||
replace_model_in_jsonl,
|
replace_model_in_jsonl,
|
||||||
)
|
)
|
||||||
from litellm.router_utils.client_initalization_utils import (
|
from litellm.router_utils.client_initalization_utils import InitalizeOpenAISDKClient
|
||||||
set_client,
|
|
||||||
should_initialize_sync_client,
|
|
||||||
)
|
|
||||||
from litellm.router_utils.cooldown_cache import CooldownCache
|
from litellm.router_utils.cooldown_cache import CooldownCache
|
||||||
from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
|
from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
|
||||||
from litellm.router_utils.cooldown_handlers import (
|
from litellm.router_utils.cooldown_handlers import (
|
||||||
|
@ -3951,7 +3948,7 @@ class Router:
|
||||||
raise Exception(f"Unsupported provider - {custom_llm_provider}")
|
raise Exception(f"Unsupported provider - {custom_llm_provider}")
|
||||||
|
|
||||||
# init OpenAI, Azure clients
|
# init OpenAI, Azure clients
|
||||||
set_client(
|
InitalizeOpenAISDKClient.set_client(
|
||||||
litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
|
litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -4661,7 +4658,9 @@ class Router:
|
||||||
"""
|
"""
|
||||||
Re-initialize the client
|
Re-initialize the client
|
||||||
"""
|
"""
|
||||||
set_client(litellm_router_instance=self, model=deployment)
|
InitalizeOpenAISDKClient.set_client(
|
||||||
|
litellm_router_instance=self, model=deployment
|
||||||
|
)
|
||||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||||
return client
|
return client
|
||||||
else:
|
else:
|
||||||
|
@ -4671,7 +4670,9 @@ class Router:
|
||||||
"""
|
"""
|
||||||
Re-initialize the client
|
Re-initialize the client
|
||||||
"""
|
"""
|
||||||
set_client(litellm_router_instance=self, model=deployment)
|
InitalizeOpenAISDKClient.set_client(
|
||||||
|
litellm_router_instance=self, model=deployment
|
||||||
|
)
|
||||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||||
return client
|
return client
|
||||||
else:
|
else:
|
||||||
|
@ -4682,7 +4683,9 @@ class Router:
|
||||||
"""
|
"""
|
||||||
Re-initialize the client
|
Re-initialize the client
|
||||||
"""
|
"""
|
||||||
set_client(litellm_router_instance=self, model=deployment)
|
InitalizeOpenAISDKClient.set_client(
|
||||||
|
litellm_router_instance=self, model=deployment
|
||||||
|
)
|
||||||
client = self.cache.get_cache(key=cache_key)
|
client = self.cache.get_cache(key=cache_key)
|
||||||
return client
|
return client
|
||||||
else:
|
else:
|
||||||
|
@ -4692,7 +4695,9 @@ class Router:
|
||||||
"""
|
"""
|
||||||
Re-initialize the client
|
Re-initialize the client
|
||||||
"""
|
"""
|
||||||
set_client(litellm_router_instance=self, model=deployment)
|
InitalizeOpenAISDKClient.set_client(
|
||||||
|
litellm_router_instance=self, model=deployment
|
||||||
|
)
|
||||||
client = self.cache.get_cache(key=cache_key)
|
client = self.cache.get_cache(key=cache_key)
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
|
|
@ -23,236 +23,227 @@ else:
|
||||||
LitellmRouter = Any
|
LitellmRouter = Any
|
||||||
|
|
||||||
|
|
||||||
def should_initialize_sync_client(
|
class InitalizeOpenAISDKClient:
|
||||||
litellm_router_instance: LitellmRouter,
|
@staticmethod
|
||||||
) -> bool:
|
def should_initialize_sync_client(
|
||||||
"""
|
litellm_router_instance: LitellmRouter,
|
||||||
Returns if Sync OpenAI, Azure Clients should be initialized.
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Returns if Sync OpenAI, Azure Clients should be initialized.
|
||||||
|
|
||||||
Do not init sync clients when router.router_general_settings.async_only_mode is True
|
Do not init sync clients when router.router_general_settings.async_only_mode is True
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if litellm_router_instance is None:
|
if litellm_router_instance is None:
|
||||||
return False
|
|
||||||
|
|
||||||
if litellm_router_instance.router_general_settings is not None:
|
|
||||||
if (
|
|
||||||
hasattr(litellm_router_instance, "router_general_settings")
|
|
||||||
and hasattr(
|
|
||||||
litellm_router_instance.router_general_settings, "async_only_mode"
|
|
||||||
)
|
|
||||||
and litellm_router_instance.router_general_settings.async_only_mode is True
|
|
||||||
):
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
if litellm_router_instance.router_general_settings is not None:
|
||||||
|
if (
|
||||||
|
hasattr(litellm_router_instance, "router_general_settings")
|
||||||
|
and hasattr(
|
||||||
|
litellm_router_instance.router_general_settings, "async_only_mode"
|
||||||
|
)
|
||||||
|
and litellm_router_instance.router_general_settings.async_only_mode
|
||||||
|
is True
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PLR0915
|
@staticmethod
|
||||||
"""
|
def set_client( # noqa: PLR0915
|
||||||
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
|
litellm_router_instance: LitellmRouter, model: dict
|
||||||
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
|
|
||||||
"""
|
|
||||||
client_ttl = litellm_router_instance.client_ttl
|
|
||||||
litellm_params = model.get("litellm_params", {})
|
|
||||||
model_name = litellm_params.get("model")
|
|
||||||
model_id = model["model_info"]["id"]
|
|
||||||
# ### IF RPM SET - initialize a semaphore ###
|
|
||||||
rpm = litellm_params.get("rpm", None)
|
|
||||||
tpm = litellm_params.get("tpm", None)
|
|
||||||
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
|
||||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
|
||||||
rpm=rpm,
|
|
||||||
max_parallel_requests=max_parallel_requests,
|
|
||||||
tpm=tpm,
|
|
||||||
default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
|
|
||||||
)
|
|
||||||
if calculated_max_parallel_requests:
|
|
||||||
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
|
||||||
cache_key = f"{model_id}_max_parallel_requests_client"
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=semaphore,
|
|
||||||
local_only=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
|
||||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
|
||||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
|
||||||
default_api_base = None
|
|
||||||
default_api_key = None
|
|
||||||
if custom_llm_provider in litellm.openai_compatible_providers:
|
|
||||||
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
|
|
||||||
model=model_name
|
|
||||||
)
|
|
||||||
default_api_base = api_base
|
|
||||||
default_api_key = api_key
|
|
||||||
|
|
||||||
if (
|
|
||||||
model_name in litellm.open_ai_chat_completion_models
|
|
||||||
or custom_llm_provider in litellm.openai_compatible_providers
|
|
||||||
or custom_llm_provider == "azure"
|
|
||||||
or custom_llm_provider == "azure_text"
|
|
||||||
or custom_llm_provider == "custom_openai"
|
|
||||||
or custom_llm_provider == "openai"
|
|
||||||
or custom_llm_provider == "text-completion-openai"
|
|
||||||
or "ft:gpt-3.5-turbo" in model_name
|
|
||||||
or model_name in litellm.open_ai_embedding_models
|
|
||||||
):
|
):
|
||||||
is_azure_ai_studio_model: bool = False
|
|
||||||
if custom_llm_provider == "azure":
|
|
||||||
if litellm.utils._is_non_openai_azure_model(model_name):
|
|
||||||
is_azure_ai_studio_model = True
|
|
||||||
custom_llm_provider = "openai"
|
|
||||||
# remove azure prefx from model_name
|
|
||||||
model_name = model_name.replace("azure/", "")
|
|
||||||
# glorified / complicated reading of configs
|
|
||||||
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
|
||||||
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
|
||||||
api_key = litellm_params.get("api_key") or default_api_key
|
|
||||||
if api_key and isinstance(api_key, str) and api_key.startswith("os.environ/"):
|
|
||||||
api_key_env_name = api_key.replace("os.environ/", "")
|
|
||||||
api_key = get_secret_str(api_key_env_name)
|
|
||||||
litellm_params["api_key"] = api_key
|
|
||||||
|
|
||||||
api_base = litellm_params.get("api_base")
|
|
||||||
base_url: Optional[str] = litellm_params.get("base_url")
|
|
||||||
api_base = (
|
|
||||||
api_base or base_url or default_api_base
|
|
||||||
) # allow users to pass in `api_base` or `base_url` for azure
|
|
||||||
if api_base and api_base.startswith("os.environ/"):
|
|
||||||
api_base_env_name = api_base.replace("os.environ/", "")
|
|
||||||
api_base = get_secret_str(api_base_env_name)
|
|
||||||
litellm_params["api_base"] = api_base
|
|
||||||
|
|
||||||
## AZURE AI STUDIO MISTRAL CHECK ##
|
|
||||||
"""
|
"""
|
||||||
Make sure api base ends in /v1/
|
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
|
||||||
|
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
|
||||||
if not, add it - https://github.com/BerriAI/litellm/issues/2279
|
|
||||||
"""
|
"""
|
||||||
if (
|
client_ttl = litellm_router_instance.client_ttl
|
||||||
is_azure_ai_studio_model is True
|
litellm_params = model.get("litellm_params", {})
|
||||||
and api_base is not None
|
model_name = litellm_params.get("model")
|
||||||
and isinstance(api_base, str)
|
model_id = model["model_info"]["id"]
|
||||||
and not api_base.endswith("/v1/")
|
# ### IF RPM SET - initialize a semaphore ###
|
||||||
):
|
rpm = litellm_params.get("rpm", None)
|
||||||
# check if it ends with a trailing slash
|
tpm = litellm_params.get("tpm", None)
|
||||||
if api_base.endswith("/"):
|
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||||
api_base += "v1/"
|
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||||
elif api_base.endswith("/v1"):
|
rpm=rpm,
|
||||||
api_base += "/"
|
max_parallel_requests=max_parallel_requests,
|
||||||
else:
|
tpm=tpm,
|
||||||
api_base += "/v1/"
|
default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
|
||||||
|
|
||||||
api_version = litellm_params.get("api_version")
|
|
||||||
if api_version and api_version.startswith("os.environ/"):
|
|
||||||
api_version_env_name = api_version.replace("os.environ/", "")
|
|
||||||
api_version = get_secret_str(api_version_env_name)
|
|
||||||
litellm_params["api_version"] = api_version
|
|
||||||
|
|
||||||
timeout: Optional[float] = (
|
|
||||||
litellm_params.pop("timeout", None) or litellm.request_timeout
|
|
||||||
)
|
)
|
||||||
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
|
if calculated_max_parallel_requests:
|
||||||
timeout_env_name = timeout.replace("os.environ/", "")
|
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||||
timeout = get_secret(timeout_env_name) # type: ignore
|
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||||
litellm_params["timeout"] = timeout
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
stream_timeout: Optional[float] = litellm_params.pop(
|
value=semaphore,
|
||||||
"stream_timeout", timeout
|
local_only=True,
|
||||||
) # if no stream_timeout is set, default to timeout
|
|
||||||
if isinstance(stream_timeout, str) and stream_timeout.startswith("os.environ/"):
|
|
||||||
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
|
|
||||||
stream_timeout = get_secret(stream_timeout_env_name) # type: ignore
|
|
||||||
litellm_params["stream_timeout"] = stream_timeout
|
|
||||||
|
|
||||||
max_retries: Optional[int] = litellm_params.pop(
|
|
||||||
"max_retries", 0
|
|
||||||
) # router handles retry logic
|
|
||||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
|
||||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
|
||||||
max_retries = get_secret(max_retries_env_name) # type: ignore
|
|
||||||
litellm_params["max_retries"] = max_retries
|
|
||||||
|
|
||||||
organization = litellm_params.get("organization", None)
|
|
||||||
if isinstance(organization, str) and organization.startswith("os.environ/"):
|
|
||||||
organization_env_name = organization.replace("os.environ/", "")
|
|
||||||
organization = get_secret_str(organization_env_name)
|
|
||||||
litellm_params["organization"] = organization
|
|
||||||
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
|
||||||
if litellm_params.get("tenant_id"):
|
|
||||||
verbose_router_logger.debug("Using Azure AD Token Provider for Azure Auth")
|
|
||||||
azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
|
|
||||||
tenant_id=litellm_params.get("tenant_id"),
|
|
||||||
client_id=litellm_params.get("client_id"),
|
|
||||||
client_secret=litellm_params.get("client_secret"),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||||
if api_base is None or not isinstance(api_base, str):
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
filtered_litellm_params = {
|
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||||
k: v for k, v in model["litellm_params"].items() if k != "api_key"
|
default_api_base = None
|
||||||
}
|
default_api_key = None
|
||||||
_filtered_model = {
|
if custom_llm_provider in litellm.openai_compatible_providers:
|
||||||
"model_name": model["model_name"],
|
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
|
||||||
"litellm_params": filtered_litellm_params,
|
model=model_name
|
||||||
}
|
)
|
||||||
raise ValueError(
|
default_api_base = api_base
|
||||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
default_api_key = api_key
|
||||||
)
|
|
||||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
if (
|
||||||
if azure_ad_token is not None:
|
model_name in litellm.open_ai_chat_completion_models
|
||||||
if azure_ad_token.startswith("oidc/"):
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
or custom_llm_provider == "azure"
|
||||||
elif (
|
or custom_llm_provider == "azure_text"
|
||||||
azure_ad_token_provider is None
|
or custom_llm_provider == "custom_openai"
|
||||||
and litellm.enable_azure_ad_token_refresh is True
|
or custom_llm_provider == "openai"
|
||||||
|
or custom_llm_provider == "text-completion-openai"
|
||||||
|
or "ft:gpt-3.5-turbo" in model_name
|
||||||
|
or model_name in litellm.open_ai_embedding_models
|
||||||
|
):
|
||||||
|
is_azure_ai_studio_model: bool = False
|
||||||
|
if custom_llm_provider == "azure":
|
||||||
|
if litellm.utils._is_non_openai_azure_model(model_name):
|
||||||
|
is_azure_ai_studio_model = True
|
||||||
|
custom_llm_provider = "openai"
|
||||||
|
# remove azure prefx from model_name
|
||||||
|
model_name = model_name.replace("azure/", "")
|
||||||
|
# glorified / complicated reading of configs
|
||||||
|
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
||||||
|
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
||||||
|
api_key = litellm_params.get("api_key") or default_api_key
|
||||||
|
if (
|
||||||
|
api_key
|
||||||
|
and isinstance(api_key, str)
|
||||||
|
and api_key.startswith("os.environ/")
|
||||||
):
|
):
|
||||||
try:
|
api_key_env_name = api_key.replace("os.environ/", "")
|
||||||
azure_ad_token_provider = get_azure_ad_token_provider()
|
api_key = get_secret_str(api_key_env_name)
|
||||||
except ValueError:
|
litellm_params["api_key"] = api_key
|
||||||
verbose_router_logger.debug(
|
|
||||||
"Azure AD Token Provider could not be used."
|
|
||||||
)
|
|
||||||
if api_version is None:
|
|
||||||
api_version = os.getenv(
|
|
||||||
"AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
|
|
||||||
)
|
|
||||||
|
|
||||||
if "gateway.ai.cloudflare.com" in api_base:
|
api_base = litellm_params.get("api_base")
|
||||||
if not api_base.endswith("/"):
|
base_url: Optional[str] = litellm_params.get("base_url")
|
||||||
|
api_base = (
|
||||||
|
api_base or base_url or default_api_base
|
||||||
|
) # allow users to pass in `api_base` or `base_url` for azure
|
||||||
|
if api_base and api_base.startswith("os.environ/"):
|
||||||
|
api_base_env_name = api_base.replace("os.environ/", "")
|
||||||
|
api_base = get_secret_str(api_base_env_name)
|
||||||
|
litellm_params["api_base"] = api_base
|
||||||
|
|
||||||
|
## AZURE AI STUDIO MISTRAL CHECK ##
|
||||||
|
"""
|
||||||
|
Make sure api base ends in /v1/
|
||||||
|
|
||||||
|
if not, add it - https://github.com/BerriAI/litellm/issues/2279
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
is_azure_ai_studio_model is True
|
||||||
|
and api_base is not None
|
||||||
|
and isinstance(api_base, str)
|
||||||
|
and not api_base.endswith("/v1/")
|
||||||
|
):
|
||||||
|
# check if it ends with a trailing slash
|
||||||
|
if api_base.endswith("/"):
|
||||||
|
api_base += "v1/"
|
||||||
|
elif api_base.endswith("/v1"):
|
||||||
api_base += "/"
|
api_base += "/"
|
||||||
azure_model = model_name.replace("azure/", "")
|
else:
|
||||||
api_base += f"{azure_model}"
|
api_base += "/v1/"
|
||||||
cache_key = f"{model_id}_async_client"
|
|
||||||
_client = openai.AsyncAzureOpenAI(
|
|
||||||
api_key=api_key,
|
|
||||||
azure_ad_token=azure_ad_token,
|
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
|
||||||
base_url=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if should_initialize_sync_client(
|
api_version = litellm_params.get("api_version")
|
||||||
litellm_router_instance=litellm_router_instance
|
if api_version and api_version.startswith("os.environ/"):
|
||||||
|
api_version_env_name = api_version.replace("os.environ/", "")
|
||||||
|
api_version = get_secret_str(api_version_env_name)
|
||||||
|
litellm_params["api_version"] = api_version
|
||||||
|
|
||||||
|
timeout: Optional[float] = (
|
||||||
|
litellm_params.pop("timeout", None) or litellm.request_timeout
|
||||||
|
)
|
||||||
|
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
|
||||||
|
timeout_env_name = timeout.replace("os.environ/", "")
|
||||||
|
timeout = get_secret(timeout_env_name) # type: ignore
|
||||||
|
litellm_params["timeout"] = timeout
|
||||||
|
|
||||||
|
stream_timeout: Optional[float] = litellm_params.pop(
|
||||||
|
"stream_timeout", timeout
|
||||||
|
) # if no stream_timeout is set, default to timeout
|
||||||
|
if isinstance(stream_timeout, str) and stream_timeout.startswith(
|
||||||
|
"os.environ/"
|
||||||
|
):
|
||||||
|
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
|
||||||
|
stream_timeout = get_secret(stream_timeout_env_name) # type: ignore
|
||||||
|
litellm_params["stream_timeout"] = stream_timeout
|
||||||
|
|
||||||
|
max_retries: Optional[int] = litellm_params.pop(
|
||||||
|
"max_retries", 0
|
||||||
|
) # router handles retry logic
|
||||||
|
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||||
|
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||||
|
max_retries = get_secret(max_retries_env_name) # type: ignore
|
||||||
|
litellm_params["max_retries"] = max_retries
|
||||||
|
|
||||||
|
organization = litellm_params.get("organization", None)
|
||||||
|
if isinstance(organization, str) and organization.startswith("os.environ/"):
|
||||||
|
organization_env_name = organization.replace("os.environ/", "")
|
||||||
|
organization = get_secret_str(organization_env_name)
|
||||||
|
litellm_params["organization"] = organization
|
||||||
|
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
||||||
|
if litellm_params.get("tenant_id"):
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
"Using Azure AD Token Provider for Azure Auth"
|
||||||
|
)
|
||||||
|
azure_ad_token_provider = (
|
||||||
|
InitalizeOpenAISDKClient.get_azure_ad_token_from_entrata_id(
|
||||||
|
tenant_id=litellm_params.get("tenant_id"),
|
||||||
|
client_id=litellm_params.get("client_id"),
|
||||||
|
client_secret=litellm_params.get("client_secret"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
||||||
|
if api_base is None or not isinstance(api_base, str):
|
||||||
|
filtered_litellm_params = {
|
||||||
|
k: v
|
||||||
|
for k, v in model["litellm_params"].items()
|
||||||
|
if k != "api_key"
|
||||||
|
}
|
||||||
|
_filtered_model = {
|
||||||
|
"model_name": model["model_name"],
|
||||||
|
"litellm_params": filtered_litellm_params,
|
||||||
|
}
|
||||||
|
raise ValueError(
|
||||||
|
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
||||||
|
)
|
||||||
|
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||||
|
if azure_ad_token is not None:
|
||||||
|
if azure_ad_token.startswith("oidc/"):
|
||||||
|
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||||
|
elif (
|
||||||
|
azure_ad_token_provider is None
|
||||||
|
and litellm.enable_azure_ad_token_refresh is True
|
||||||
):
|
):
|
||||||
cache_key = f"{model_id}_client"
|
try:
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
azure_ad_token_provider = get_azure_ad_token_provider()
|
||||||
|
except ValueError:
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
"Azure AD Token Provider could not be used."
|
||||||
|
)
|
||||||
|
if api_version is None:
|
||||||
|
api_version = os.getenv(
|
||||||
|
"AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
|
||||||
|
)
|
||||||
|
|
||||||
|
if "gateway.ai.cloudflare.com" in api_base:
|
||||||
|
if not api_base.endswith("/"):
|
||||||
|
api_base += "/"
|
||||||
|
azure_model = model_name.replace("azure/", "")
|
||||||
|
api_base += f"{azure_model}"
|
||||||
|
cache_key = f"{model_id}_async_client"
|
||||||
|
_client = openai.AsyncAzureOpenAI(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
azure_ad_token=azure_ad_token,
|
azure_ad_token=azure_ad_token,
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
azure_ad_token_provider=azure_ad_token_provider,
|
||||||
|
@ -260,7 +251,7 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
timeout=timeout, # type: ignore
|
timeout=timeout, # type: ignore
|
||||||
max_retries=max_retries, # type: ignore
|
max_retries=max_retries, # type: ignore
|
||||||
http_client=httpx.Client(
|
http_client=httpx.AsyncClient(
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
),
|
),
|
||||||
|
@ -273,35 +264,35 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
ttl=client_ttl,
|
ttl=client_ttl,
|
||||||
local_only=True,
|
local_only=True,
|
||||||
) # cache for 1 hr
|
) # cache for 1 hr
|
||||||
# streaming clients can have diff timeouts
|
|
||||||
cache_key = f"{model_id}_stream_async_client"
|
|
||||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
|
||||||
api_key=api_key,
|
|
||||||
azure_ad_token=azure_ad_token,
|
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
|
||||||
base_url=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
timeout=stream_timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if should_initialize_sync_client(
|
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||||
litellm_router_instance=litellm_router_instance
|
litellm_router_instance=litellm_router_instance
|
||||||
):
|
):
|
||||||
cache_key = f"{model_id}_stream_client"
|
cache_key = f"{model_id}_client"
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
_client = openai.AzureOpenAI( # type: ignore
|
||||||
|
api_key=api_key,
|
||||||
|
azure_ad_token=azure_ad_token,
|
||||||
|
azure_ad_token_provider=azure_ad_token_provider,
|
||||||
|
base_url=api_base,
|
||||||
|
api_version=api_version,
|
||||||
|
timeout=timeout, # type: ignore
|
||||||
|
max_retries=max_retries, # type: ignore
|
||||||
|
http_client=httpx.Client(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
verify=litellm.ssl_verify,
|
||||||
|
), # type: ignore
|
||||||
|
)
|
||||||
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
|
value=_client,
|
||||||
|
ttl=client_ttl,
|
||||||
|
local_only=True,
|
||||||
|
) # cache for 1 hr
|
||||||
|
# streaming clients can have diff timeouts
|
||||||
|
cache_key = f"{model_id}_stream_async_client"
|
||||||
|
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
azure_ad_token=azure_ad_token,
|
azure_ad_token=azure_ad_token,
|
||||||
azure_ad_token_provider=azure_ad_token_provider,
|
azure_ad_token_provider=azure_ad_token_provider,
|
||||||
|
@ -309,7 +300,7 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
timeout=stream_timeout, # type: ignore
|
timeout=stream_timeout, # type: ignore
|
||||||
max_retries=max_retries, # type: ignore
|
max_retries=max_retries, # type: ignore
|
||||||
http_client=httpx.Client(
|
http_client=httpx.AsyncClient(
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
),
|
),
|
||||||
|
@ -322,41 +313,159 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
ttl=client_ttl,
|
ttl=client_ttl,
|
||||||
local_only=True,
|
local_only=True,
|
||||||
) # cache for 1 hr
|
) # cache for 1 hr
|
||||||
|
|
||||||
|
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||||
|
litellm_router_instance=litellm_router_instance
|
||||||
|
):
|
||||||
|
cache_key = f"{model_id}_stream_client"
|
||||||
|
_client = openai.AzureOpenAI( # type: ignore
|
||||||
|
api_key=api_key,
|
||||||
|
azure_ad_token=azure_ad_token,
|
||||||
|
azure_ad_token_provider=azure_ad_token_provider,
|
||||||
|
base_url=api_base,
|
||||||
|
api_version=api_version,
|
||||||
|
timeout=stream_timeout, # type: ignore
|
||||||
|
max_retries=max_retries, # type: ignore
|
||||||
|
http_client=httpx.Client(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
verify=litellm.ssl_verify,
|
||||||
|
), # type: ignore
|
||||||
|
)
|
||||||
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
|
value=_client,
|
||||||
|
ttl=client_ttl,
|
||||||
|
local_only=True,
|
||||||
|
) # cache for 1 hr
|
||||||
|
else:
|
||||||
|
_api_key = api_key
|
||||||
|
if _api_key is not None and isinstance(_api_key, str):
|
||||||
|
# only show first 5 chars of api_key
|
||||||
|
_api_key = _api_key[:8] + "*" * 15
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
||||||
|
)
|
||||||
|
azure_client_params = {
|
||||||
|
"api_key": api_key,
|
||||||
|
"azure_endpoint": api_base,
|
||||||
|
"api_version": api_version,
|
||||||
|
"azure_ad_token": azure_ad_token,
|
||||||
|
"azure_ad_token_provider": azure_ad_token_provider,
|
||||||
|
}
|
||||||
|
|
||||||
|
if azure_ad_token_provider is not None:
|
||||||
|
azure_client_params["azure_ad_token_provider"] = (
|
||||||
|
azure_ad_token_provider
|
||||||
|
)
|
||||||
|
from litellm.llms.AzureOpenAI.azure import (
|
||||||
|
select_azure_base_url_or_endpoint,
|
||||||
|
)
|
||||||
|
|
||||||
|
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
||||||
|
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
||||||
|
azure_client_params = select_azure_base_url_or_endpoint(
|
||||||
|
azure_client_params
|
||||||
|
)
|
||||||
|
|
||||||
|
cache_key = f"{model_id}_async_client"
|
||||||
|
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||||
|
**azure_client_params,
|
||||||
|
timeout=timeout, # type: ignore
|
||||||
|
max_retries=max_retries, # type: ignore
|
||||||
|
http_client=httpx.AsyncClient(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
verify=litellm.ssl_verify,
|
||||||
|
), # type: ignore
|
||||||
|
)
|
||||||
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
|
value=_client,
|
||||||
|
ttl=client_ttl,
|
||||||
|
local_only=True,
|
||||||
|
) # cache for 1 hr
|
||||||
|
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||||
|
litellm_router_instance=litellm_router_instance
|
||||||
|
):
|
||||||
|
cache_key = f"{model_id}_client"
|
||||||
|
_client = openai.AzureOpenAI( # type: ignore
|
||||||
|
**azure_client_params,
|
||||||
|
timeout=timeout, # type: ignore
|
||||||
|
max_retries=max_retries, # type: ignore
|
||||||
|
http_client=httpx.Client(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
verify=litellm.ssl_verify,
|
||||||
|
), # type: ignore
|
||||||
|
)
|
||||||
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
|
value=_client,
|
||||||
|
ttl=client_ttl,
|
||||||
|
local_only=True,
|
||||||
|
) # cache for 1 hr
|
||||||
|
|
||||||
|
# streaming clients should have diff timeouts
|
||||||
|
cache_key = f"{model_id}_stream_async_client"
|
||||||
|
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||||
|
**azure_client_params,
|
||||||
|
timeout=stream_timeout, # type: ignore
|
||||||
|
max_retries=max_retries, # type: ignore
|
||||||
|
http_client=httpx.AsyncClient(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
verify=litellm.ssl_verify,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
|
value=_client,
|
||||||
|
ttl=client_ttl,
|
||||||
|
local_only=True,
|
||||||
|
) # cache for 1 hr
|
||||||
|
|
||||||
|
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||||
|
litellm_router_instance=litellm_router_instance
|
||||||
|
):
|
||||||
|
cache_key = f"{model_id}_stream_client"
|
||||||
|
_client = openai.AzureOpenAI( # type: ignore
|
||||||
|
**azure_client_params,
|
||||||
|
timeout=stream_timeout, # type: ignore
|
||||||
|
max_retries=max_retries, # type: ignore
|
||||||
|
http_client=httpx.Client(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
verify=litellm.ssl_verify,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
litellm_router_instance.cache.set_cache(
|
||||||
|
key=cache_key,
|
||||||
|
value=_client,
|
||||||
|
ttl=client_ttl,
|
||||||
|
local_only=True,
|
||||||
|
) # cache for 1 hr
|
||||||
|
|
||||||
else:
|
else:
|
||||||
_api_key = api_key
|
_api_key = api_key # type: ignore
|
||||||
if _api_key is not None and isinstance(_api_key, str):
|
if _api_key is not None and isinstance(_api_key, str):
|
||||||
# only show first 5 chars of api_key
|
# only show first 5 chars of api_key
|
||||||
_api_key = _api_key[:8] + "*" * 15
|
_api_key = _api_key[:8] + "*" * 15
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
|
||||||
)
|
)
|
||||||
azure_client_params = {
|
|
||||||
"api_key": api_key,
|
|
||||||
"azure_endpoint": api_base,
|
|
||||||
"api_version": api_version,
|
|
||||||
"azure_ad_token": azure_ad_token,
|
|
||||||
"azure_ad_token_provider": azure_ad_token_provider,
|
|
||||||
}
|
|
||||||
|
|
||||||
if azure_ad_token_provider is not None:
|
|
||||||
azure_client_params["azure_ad_token_provider"] = (
|
|
||||||
azure_ad_token_provider
|
|
||||||
)
|
|
||||||
from litellm.llms.AzureOpenAI.azure import (
|
|
||||||
select_azure_base_url_or_endpoint,
|
|
||||||
)
|
|
||||||
|
|
||||||
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
|
||||||
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
|
||||||
azure_client_params = select_azure_base_url_or_endpoint(
|
|
||||||
azure_client_params
|
|
||||||
)
|
|
||||||
|
|
||||||
cache_key = f"{model_id}_async_client"
|
cache_key = f"{model_id}_async_client"
|
||||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
_client = openai.AsyncOpenAI( # type: ignore
|
||||||
**azure_client_params,
|
api_key=api_key,
|
||||||
|
base_url=api_base,
|
||||||
timeout=timeout, # type: ignore
|
timeout=timeout, # type: ignore
|
||||||
max_retries=max_retries, # type: ignore
|
max_retries=max_retries, # type: ignore
|
||||||
|
organization=organization,
|
||||||
http_client=httpx.AsyncClient(
|
http_client=httpx.AsyncClient(
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
@ -370,14 +479,17 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
ttl=client_ttl,
|
ttl=client_ttl,
|
||||||
local_only=True,
|
local_only=True,
|
||||||
) # cache for 1 hr
|
) # cache for 1 hr
|
||||||
if should_initialize_sync_client(
|
|
||||||
|
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||||
litellm_router_instance=litellm_router_instance
|
litellm_router_instance=litellm_router_instance
|
||||||
):
|
):
|
||||||
cache_key = f"{model_id}_client"
|
cache_key = f"{model_id}_client"
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
_client = openai.OpenAI( # type: ignore
|
||||||
**azure_client_params,
|
api_key=api_key,
|
||||||
|
base_url=api_base,
|
||||||
timeout=timeout, # type: ignore
|
timeout=timeout, # type: ignore
|
||||||
max_retries=max_retries, # type: ignore
|
max_retries=max_retries, # type: ignore
|
||||||
|
organization=organization,
|
||||||
http_client=httpx.Client(
|
http_client=httpx.Client(
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
@ -394,16 +506,18 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
|
|
||||||
# streaming clients should have diff timeouts
|
# streaming clients should have diff timeouts
|
||||||
cache_key = f"{model_id}_stream_async_client"
|
cache_key = f"{model_id}_stream_async_client"
|
||||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
_client = openai.AsyncOpenAI( # type: ignore
|
||||||
**azure_client_params,
|
api_key=api_key,
|
||||||
|
base_url=api_base,
|
||||||
timeout=stream_timeout, # type: ignore
|
timeout=stream_timeout, # type: ignore
|
||||||
max_retries=max_retries, # type: ignore
|
max_retries=max_retries, # type: ignore
|
||||||
|
organization=organization,
|
||||||
http_client=httpx.AsyncClient(
|
http_client=httpx.AsyncClient(
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
),
|
),
|
||||||
verify=litellm.ssl_verify,
|
verify=litellm.ssl_verify,
|
||||||
),
|
), # type: ignore
|
||||||
)
|
)
|
||||||
litellm_router_instance.cache.set_cache(
|
litellm_router_instance.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -412,20 +526,23 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
local_only=True,
|
local_only=True,
|
||||||
) # cache for 1 hr
|
) # cache for 1 hr
|
||||||
|
|
||||||
if should_initialize_sync_client(
|
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||||
litellm_router_instance=litellm_router_instance
|
litellm_router_instance=litellm_router_instance
|
||||||
):
|
):
|
||||||
|
# streaming clients should have diff timeouts
|
||||||
cache_key = f"{model_id}_stream_client"
|
cache_key = f"{model_id}_stream_client"
|
||||||
_client = openai.AzureOpenAI( # type: ignore
|
_client = openai.OpenAI( # type: ignore
|
||||||
**azure_client_params,
|
api_key=api_key,
|
||||||
|
base_url=api_base,
|
||||||
timeout=stream_timeout, # type: ignore
|
timeout=stream_timeout, # type: ignore
|
||||||
max_retries=max_retries, # type: ignore
|
max_retries=max_retries, # type: ignore
|
||||||
|
organization=organization,
|
||||||
http_client=httpx.Client(
|
http_client=httpx.Client(
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
),
|
),
|
||||||
verify=litellm.ssl_verify,
|
verify=litellm.ssl_verify,
|
||||||
),
|
), # type: ignore
|
||||||
)
|
)
|
||||||
litellm_router_instance.cache.set_cache(
|
litellm_router_instance.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -434,149 +551,49 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
||||||
local_only=True,
|
local_only=True,
|
||||||
) # cache for 1 hr
|
) # cache for 1 hr
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_azure_ad_token_from_entrata_id(
|
||||||
|
tenant_id: str, client_id: str, client_secret: str
|
||||||
|
) -> Callable[[], str]:
|
||||||
|
from azure.identity import (
|
||||||
|
ClientSecretCredential,
|
||||||
|
DefaultAzureCredential,
|
||||||
|
get_bearer_token_provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_router_logger.debug("Getting Azure AD Token from Entrata ID")
|
||||||
|
|
||||||
|
if tenant_id.startswith("os.environ/"):
|
||||||
|
_tenant_id = get_secret_str(tenant_id)
|
||||||
else:
|
else:
|
||||||
_api_key = api_key # type: ignore
|
_tenant_id = tenant_id
|
||||||
if _api_key is not None and isinstance(_api_key, str):
|
|
||||||
# only show first 5 chars of api_key
|
|
||||||
_api_key = _api_key[:8] + "*" * 15
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
|
|
||||||
)
|
|
||||||
cache_key = f"{model_id}_async_client"
|
|
||||||
_client = openai.AsyncOpenAI( # type: ignore
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=api_base,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
organization=organization,
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if should_initialize_sync_client(
|
if client_id.startswith("os.environ/"):
|
||||||
litellm_router_instance=litellm_router_instance
|
_client_id = get_secret_str(client_id)
|
||||||
):
|
else:
|
||||||
cache_key = f"{model_id}_client"
|
_client_id = client_id
|
||||||
_client = openai.OpenAI( # type: ignore
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=api_base,
|
|
||||||
timeout=timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
organization=organization,
|
|
||||||
http_client=httpx.Client(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
# streaming clients should have diff timeouts
|
if client_secret.startswith("os.environ/"):
|
||||||
cache_key = f"{model_id}_stream_async_client"
|
_client_secret = get_secret_str(client_secret)
|
||||||
_client = openai.AsyncOpenAI( # type: ignore
|
else:
|
||||||
api_key=api_key,
|
_client_secret = client_secret
|
||||||
base_url=api_base,
|
|
||||||
timeout=stream_timeout, # type: ignore
|
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
organization=organization,
|
|
||||||
http_client=httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
if should_initialize_sync_client(
|
verbose_router_logger.debug(
|
||||||
litellm_router_instance=litellm_router_instance
|
"tenant_id %s, client_id %s, client_secret %s",
|
||||||
):
|
_tenant_id,
|
||||||
# streaming clients should have diff timeouts
|
_client_id,
|
||||||
cache_key = f"{model_id}_stream_client"
|
_client_secret,
|
||||||
_client = openai.OpenAI( # type: ignore
|
)
|
||||||
api_key=api_key,
|
if _tenant_id is None or _client_id is None or _client_secret is None:
|
||||||
base_url=api_base,
|
raise ValueError("tenant_id, client_id, and client_secret must be provided")
|
||||||
timeout=stream_timeout, # type: ignore
|
credential = ClientSecretCredential(_tenant_id, _client_id, _client_secret)
|
||||||
max_retries=max_retries, # type: ignore
|
|
||||||
organization=organization,
|
|
||||||
http_client=httpx.Client(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
),
|
|
||||||
verify=litellm.ssl_verify,
|
|
||||||
), # type: ignore
|
|
||||||
)
|
|
||||||
litellm_router_instance.cache.set_cache(
|
|
||||||
key=cache_key,
|
|
||||||
value=_client,
|
|
||||||
ttl=client_ttl,
|
|
||||||
local_only=True,
|
|
||||||
) # cache for 1 hr
|
|
||||||
|
|
||||||
|
verbose_router_logger.debug("credential %s", credential)
|
||||||
|
|
||||||
def get_azure_ad_token_from_entrata_id(
|
token_provider = get_bearer_token_provider(
|
||||||
tenant_id: str, client_id: str, client_secret: str
|
credential, "https://cognitiveservices.azure.com/.default"
|
||||||
) -> Callable[[], str]:
|
)
|
||||||
from azure.identity import (
|
|
||||||
ClientSecretCredential,
|
|
||||||
DefaultAzureCredential,
|
|
||||||
get_bearer_token_provider,
|
|
||||||
)
|
|
||||||
|
|
||||||
verbose_router_logger.debug("Getting Azure AD Token from Entrata ID")
|
verbose_router_logger.debug("token_provider %s", token_provider)
|
||||||
|
|
||||||
if tenant_id.startswith("os.environ/"):
|
return token_provider
|
||||||
_tenant_id = get_secret_str(tenant_id)
|
|
||||||
else:
|
|
||||||
_tenant_id = tenant_id
|
|
||||||
|
|
||||||
if client_id.startswith("os.environ/"):
|
|
||||||
_client_id = get_secret_str(client_id)
|
|
||||||
else:
|
|
||||||
_client_id = client_id
|
|
||||||
|
|
||||||
if client_secret.startswith("os.environ/"):
|
|
||||||
_client_secret = get_secret_str(client_secret)
|
|
||||||
else:
|
|
||||||
_client_secret = client_secret
|
|
||||||
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
"tenant_id %s, client_id %s, client_secret %s",
|
|
||||||
_tenant_id,
|
|
||||||
_client_id,
|
|
||||||
_client_secret,
|
|
||||||
)
|
|
||||||
if _tenant_id is None or _client_id is None or _client_secret is None:
|
|
||||||
raise ValueError("tenant_id, client_id, and client_secret must be provided")
|
|
||||||
credential = ClientSecretCredential(_tenant_id, _client_id, _client_secret)
|
|
||||||
|
|
||||||
verbose_router_logger.debug("credential %s", credential)
|
|
||||||
|
|
||||||
token_provider = get_bearer_token_provider(
|
|
||||||
credential, "https://cognitiveservices.azure.com/.default"
|
|
||||||
)
|
|
||||||
|
|
||||||
verbose_router_logger.debug("token_provider %s", token_provider)
|
|
||||||
|
|
||||||
return token_provider
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue