mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(key_management_endpoints.py): override metadata field value on up… (#7008)
* fix(key_management_endpoints.py): override metadata field value on update allow user to override tags * feat(__init__.py): expose new disable_end_user_cost_tracking_prometheus_only metric allow disabling end user cost tracking on prometheus - fixes cardinality issue * fix(litellm_pre_call_utils.py): add key/team level enforced params Fixes https://github.com/BerriAI/litellm/issues/6652 * fix(key_management_endpoints.py): allow user to pass in `enforced_params` as a top level param on /key/generate and /key/update * docs(enterprise.md): add docs on enforcing required params for llm requests * Add support of Galadriel API (#7005) * fix(router.py): robust retry after handling set retry after time to 0 if >0 healthy deployments. handle base case = 1 deployment * test(test_router.py): fix test * feat(bedrock/): add support for 'nova' models also adds explicit 'converse/' route for simpler routing * fix: fix 'supports_pdf_input' return if model supports pdf input on get_model_info * feat(converse_transformation.py): support bedrock pdf input * docs(document_understanding.md): add document understanding to docs * fix(litellm_pre_call_utils.py): fix linting error * fix(init.py): fix passing of bedrock converse models * feat(bedrock/converse): support 'response_format={"type": "json_object"}' * fix(converse_handler.py): fix linting error * fix(base_llm_unit_tests.py): fix test * fix: fix test * test: fix test * test: fix test * test: remove duplicate test --------- Co-authored-by: h4n0 <4738254+h4n0@users.noreply.github.com>
This commit is contained in:
parent
d558b643be
commit
6bb934c0ac
37 changed files with 1297 additions and 503 deletions
|
@ -2940,6 +2940,7 @@ class Router:
|
|||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
all_deployments=_all_deployments,
|
||||
)
|
||||
|
||||
await asyncio.sleep(retry_after)
|
||||
|
@ -2972,6 +2973,7 @@ class Router:
|
|||
remaining_retries=remaining_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
all_deployments=_all_deployments,
|
||||
)
|
||||
await asyncio.sleep(_timeout)
|
||||
|
||||
|
@ -3149,6 +3151,7 @@ class Router:
|
|||
remaining_retries: int,
|
||||
num_retries: int,
|
||||
healthy_deployments: Optional[List] = None,
|
||||
all_deployments: Optional[List] = None,
|
||||
) -> Union[int, float]:
|
||||
"""
|
||||
Calculate back-off, then retry
|
||||
|
@ -3157,10 +3160,14 @@ class Router:
|
|||
1. there are healthy deployments in the same model group
|
||||
2. there are fallbacks for the completion call
|
||||
"""
|
||||
if (
|
||||
|
||||
## base case - single deployment
|
||||
if all_deployments is not None and len(all_deployments) == 1:
|
||||
pass
|
||||
elif (
|
||||
healthy_deployments is not None
|
||||
and isinstance(healthy_deployments, list)
|
||||
and len(healthy_deployments) > 1
|
||||
and len(healthy_deployments) > 0
|
||||
):
|
||||
return 0
|
||||
|
||||
|
@ -3242,6 +3249,7 @@ class Router:
|
|||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
all_deployments=_all_deployments,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
|
@ -3276,6 +3284,7 @@ class Router:
|
|||
remaining_retries=remaining_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
all_deployments=_all_deployments,
|
||||
)
|
||||
time.sleep(_timeout)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue