LiteLLM Minor Fixes and Improvements (09/13/2024) (#5689)

* refactor: cleanup unused variables + fix pyright errors

* feat(health_check.py): Closes https://github.com/BerriAI/litellm/issues/5686

* fix(o1_reasoning.py): add stricter check for o-1 reasoning model

* refactor(mistral/): make it easier to see mistral transformation logic

* fix(openai.py): fix openai o-1 model param mapping

Fixes https://github.com/BerriAI/litellm/issues/5685

* feat(main.py): infer finetuned gemini model from base model

Fixes https://github.com/BerriAI/litellm/issues/5678

* docs(vertex.md): update docs to call finetuned gemini models

* feat(proxy_server.py): allow admin to hide proxy model aliases

Closes https://github.com/BerriAI/litellm/issues/5692

* docs(load_balancing.md): add docs on hiding alias models from proxy config

* fix(base.py): don't raise notimplemented error

* fix(user_api_key_auth.py): fix model max budget check

* fix(router.py): fix elif

* fix(user_api_key_auth.py): don't set team_id to empty str

* fix(team_endpoints.py): fix response type

* test(test_completion.py): handle predibase error

* test(test_proxy_server.py): fix test

* fix(o1_transformation.py): fix max_completion_token mapping

* test(test_image_generation.py): mark flaky test
This commit is contained in:
Krish Dholakia 2024-09-14 10:02:55 -07:00 committed by GitHub
parent db3af20d84
commit 60709a0753
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 1020 additions and 539 deletions

View file

@ -76,6 +76,7 @@ from litellm.types.llms.openai import (
ChatCompletionNamedToolChoiceParam,
ChatCompletionToolParam,
)
from litellm.types.utils import FileTypes # type: ignore
from litellm.types.utils import (
CallTypes,
ChatCompletionDeltaToolCall,
@ -84,7 +85,6 @@ from litellm.types.utils import (
Delta,
Embedding,
EmbeddingResponse,
FileTypes,
ImageResponse,
Message,
ModelInfo,
@ -2339,6 +2339,7 @@ def get_litellm_params(
text_completion=None,
azure_ad_token_provider=None,
user_continue_message=None,
base_model=None,
):
litellm_params = {
"acompletion": acompletion,
@ -2365,6 +2366,8 @@ def get_litellm_params(
"text_completion": text_completion,
"azure_ad_token_provider": azure_ad_token_provider,
"user_continue_message": user_continue_message,
"base_model": base_model
or _get_base_model_from_litellm_call_metadata(metadata=metadata),
}
return litellm_params
@ -6063,11 +6066,11 @@ def _calculate_retry_after(
max_retries: int,
response_headers: Optional[httpx.Headers] = None,
min_timeout: int = 0,
):
) -> Union[float, int]:
retry_after = _get_retry_after_from_exception_header(response_headers)
# If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
if 0 < retry_after <= 60:
if retry_after is not None and 0 < retry_after <= 60:
return retry_after
initial_retry_delay = 0.5
@ -10962,6 +10965,22 @@ def get_logging_id(start_time, response_obj):
return None
def _get_base_model_from_litellm_call_metadata(
metadata: Optional[dict],
) -> Optional[str]:
if metadata is None:
return None
if metadata is not None:
model_info = metadata.get("model_info", {})
if model_info is not None:
base_model = model_info.get("base_model", None)
if base_model is not None:
return base_model
return None
def _get_base_model_from_metadata(model_call_details=None):
if model_call_details is None:
return None
@ -10970,13 +10989,7 @@ def _get_base_model_from_metadata(model_call_details=None):
if litellm_params is not None:
metadata = litellm_params.get("metadata", {})
if metadata is not None:
model_info = metadata.get("model_info", {})
if model_info is not None:
base_model = model_info.get("base_model", None)
if base_model is not None:
return base_model
return _get_base_model_from_litellm_call_metadata(metadata=metadata)
return None