fix(key_management_endpoints.py): override metadata field value on up… (#7008)

* fix(key_management_endpoints.py): override metadata field value on update

allow user to override tags

* feat(__init__.py): expose new disable_end_user_cost_tracking_prometheus_only metric

allow disabling end user cost tracking on prometheus - fixes cardinality issue

* fix(litellm_pre_call_utils.py): add key/team level enforced params

Fixes https://github.com/BerriAI/litellm/issues/6652

* fix(key_management_endpoints.py): allow user to pass in `enforced_params` as a top level param on /key/generate and /key/update

* docs(enterprise.md): add docs on enforcing required params for llm requests

* Add support of Galadriel API (#7005)

* fix(router.py): robust retry after handling

set retry after time to 0 if >0 healthy deployments. handle base case = 1 deployment

* test(test_router.py): fix test

* feat(bedrock/): add support for 'nova' models

also adds explicit 'converse/' route for simpler routing

* fix: fix 'supports_pdf_input'

return if model supports pdf input on get_model_info

* feat(converse_transformation.py): support bedrock pdf input

* docs(document_understanding.md): add document understanding to docs

* fix(litellm_pre_call_utils.py): fix linting error

* fix(init.py): fix passing of bedrock converse models

* feat(bedrock/converse): support 'response_format={"type": "json_object"}'

* fix(converse_handler.py): fix linting error

* fix(base_llm_unit_tests.py): fix test

* fix: fix test

* test: fix test

* test: fix test

* test: remove duplicate test

---------

Co-authored-by: h4n0 <4738254+h4n0@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2024-12-03 23:03:50 -08:00 committed by GitHub
parent c17872988a
commit a392bd9772
37 changed files with 1297 additions and 503 deletions

View file

@ -2940,6 +2940,7 @@ class Router:
remaining_retries=num_retries,
num_retries=num_retries,
healthy_deployments=_healthy_deployments,
all_deployments=_all_deployments,
)
await asyncio.sleep(retry_after)
@ -2972,6 +2973,7 @@ class Router:
remaining_retries=remaining_retries,
num_retries=num_retries,
healthy_deployments=_healthy_deployments,
all_deployments=_all_deployments,
)
await asyncio.sleep(_timeout)
@ -3149,6 +3151,7 @@ class Router:
remaining_retries: int,
num_retries: int,
healthy_deployments: Optional[List] = None,
all_deployments: Optional[List] = None,
) -> Union[int, float]:
"""
Calculate back-off, then retry
@ -3157,10 +3160,14 @@ class Router:
1. there are healthy deployments in the same model group
2. there are fallbacks for the completion call
"""
if (
## base case - single deployment
if all_deployments is not None and len(all_deployments) == 1:
pass
elif (
healthy_deployments is not None
and isinstance(healthy_deployments, list)
and len(healthy_deployments) > 1
and len(healthy_deployments) > 0
):
return 0
@ -3242,6 +3249,7 @@ class Router:
remaining_retries=num_retries,
num_retries=num_retries,
healthy_deployments=_healthy_deployments,
all_deployments=_all_deployments,
)
## LOGGING
@ -3276,6 +3284,7 @@ class Router:
remaining_retries=remaining_retries,
num_retries=num_retries,
healthy_deployments=_healthy_deployments,
all_deployments=_all_deployments,
)
time.sleep(_timeout)