forked from phoenix/litellm-mirror
feat(router.py): allow setting model_region in litellm_params
Closes https://github.com/BerriAI/litellm/issues/3580
This commit is contained in:
parent
c12af219af
commit
ebc927f1c8
5 changed files with 71 additions and 25 deletions
|
@ -101,6 +101,9 @@ blocked_user_list: Optional[Union[str, List]] = None
|
||||||
banned_keywords_list: Optional[Union[str, List]] = None
|
banned_keywords_list: Optional[Union[str, List]] = None
|
||||||
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
|
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
|
||||||
##################
|
##################
|
||||||
|
### PREVIEW FEATURES ###
|
||||||
|
enable_preview_features: bool = False
|
||||||
|
##################
|
||||||
logging: bool = True
|
logging: bool = True
|
||||||
caching: bool = (
|
caching: bool = (
|
||||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
|
|
|
@ -1,25 +1,13 @@
|
||||||
model_list:
|
model_list:
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
api_base: os.environ/AZURE_API_BASE
|
||||||
api_key: my-fake-key
|
api_key: os.environ/AZURE_API_KEY
|
||||||
model: openai/my-fake-model
|
api_version: 2023-07-01-preview
|
||||||
model_name: fake-openai-endpoint
|
model: azure/azure-embedding-model
|
||||||
- litellm_params:
|
model_info:
|
||||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
base_model: text-embedding-ada-002
|
||||||
api_key: my-fake-key-2
|
mode: embedding
|
||||||
model: openai/my-fake-model-2
|
model_name: text-embedding-ada-002
|
||||||
model_name: fake-openai-endpoint
|
|
||||||
- litellm_params:
|
|
||||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
|
||||||
api_key: my-fake-key-3
|
|
||||||
model: openai/my-fake-model-3
|
|
||||||
model_name: fake-openai-endpoint
|
|
||||||
- model_name: gpt-4
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
- litellm_params:
|
|
||||||
model: together_ai/codellama/CodeLlama-13b-Instruct-hf
|
|
||||||
model_name: CodeLlama-13b-Instruct
|
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
redis_host: redis
|
redis_host: redis
|
||||||
|
@ -28,6 +16,7 @@ router_settings:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
set_verbose: True
|
set_verbose: True
|
||||||
|
enable_preview_features: true
|
||||||
# service_callback: ["prometheus_system"]
|
# service_callback: ["prometheus_system"]
|
||||||
# success_callback: ["prometheus"]
|
# success_callback: ["prometheus"]
|
||||||
# failure_callback: ["prometheus"]
|
# failure_callback: ["prometheus"]
|
||||||
|
|
|
@ -2557,23 +2557,25 @@ class Router:
|
||||||
# init OpenAI, Azure clients
|
# init OpenAI, Azure clients
|
||||||
self.set_client(model=deployment.to_json(exclude_none=True))
|
self.set_client(model=deployment.to_json(exclude_none=True))
|
||||||
|
|
||||||
# set region (if azure model)
|
# set region (if azure model) ## PREVIEW FEATURE ##
|
||||||
_auto_infer_region = os.environ.get("AUTO_INFER_REGION", False)
|
if litellm.enable_preview_features == True:
|
||||||
if _auto_infer_region == True or _auto_infer_region == "True":
|
|
||||||
print("Auto inferring region") # noqa
|
print("Auto inferring region") # noqa
|
||||||
"""
|
"""
|
||||||
Hiding behind a feature flag
|
Hiding behind a feature flag
|
||||||
When there is a large amount of LLM deployments this makes startup times blow up
|
When there is a large amount of LLM deployments this makes startup times blow up
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if "azure" in deployment.litellm_params.model:
|
if (
|
||||||
|
"azure" in deployment.litellm_params.model
|
||||||
|
and deployment.litellm_params.region_name is None
|
||||||
|
):
|
||||||
region = litellm.utils.get_model_region(
|
region = litellm.utils.get_model_region(
|
||||||
litellm_params=deployment.litellm_params, mode=None
|
litellm_params=deployment.litellm_params, mode=None
|
||||||
)
|
)
|
||||||
|
|
||||||
deployment.litellm_params.region_name = region
|
deployment.litellm_params.region_name = region
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_router_logger.error(
|
verbose_router_logger.debug(
|
||||||
"Unable to get the region for azure model - {}, {}".format(
|
"Unable to get the region for azure model - {}, {}".format(
|
||||||
deployment.litellm_params.model, str(e)
|
deployment.litellm_params.model, str(e)
|
||||||
)
|
)
|
||||||
|
|
|
@ -687,6 +687,55 @@ def test_router_context_window_check_pre_call_check_out_group():
|
||||||
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("allowed_model_region", ["eu", None])
|
||||||
|
def test_router_region_pre_call_check(allowed_model_region):
|
||||||
|
"""
|
||||||
|
If region based routing set
|
||||||
|
- check if only model in allowed region is allowed by '_pre_call_checks'
|
||||||
|
"""
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # openai model name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
"base_model": "azure/gpt-35-turbo",
|
||||||
|
"region_name": "eu",
|
||||||
|
},
|
||||||
|
"model_info": {"id": "1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo-large", # openai model name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
"model_info": {"id": "2"},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||||
|
|
||||||
|
_healthy_deployments = router._pre_call_checks(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
healthy_deployments=model_list,
|
||||||
|
messages=[{"role": "user", "content": "Hey!"}],
|
||||||
|
allowed_model_region=allowed_model_region,
|
||||||
|
)
|
||||||
|
|
||||||
|
if allowed_model_region is None:
|
||||||
|
assert len(_healthy_deployments) == 2
|
||||||
|
else:
|
||||||
|
assert len(_healthy_deployments) == 1, "No models selected as healthy"
|
||||||
|
assert (
|
||||||
|
_healthy_deployments[0]["model_info"]["id"] == "1"
|
||||||
|
), "Incorrect model id picked. Got id={}, expected id=1".format(
|
||||||
|
_healthy_deployments[0]["model_info"]["id"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
### FUNCTION CALLING
|
### FUNCTION CALLING
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5881,6 +5881,9 @@ def calculate_max_parallel_requests(
|
||||||
|
|
||||||
|
|
||||||
def _is_region_eu(model_region: str) -> bool:
|
def _is_region_eu(model_region: str) -> bool:
|
||||||
|
if model_region == "eu":
|
||||||
|
return True
|
||||||
|
|
||||||
EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
|
EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
|
||||||
for region in EU_Regions:
|
for region in EU_Regions:
|
||||||
if "europe" in model_region.lower():
|
if "europe" in model_region.lower():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue