mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes and Improvements (09/13/2024) (#5689)
* refactor: cleanup unused variables + fix pyright errors * feat(health_check.py): Closes https://github.com/BerriAI/litellm/issues/5686 * fix(o1_reasoning.py): add stricter check for o-1 reasoning model * refactor(mistral/): make it easier to see mistral transformation logic * fix(openai.py): fix openai o-1 model param mapping Fixes https://github.com/BerriAI/litellm/issues/5685 * feat(main.py): infer finetuned gemini model from base model Fixes https://github.com/BerriAI/litellm/issues/5678 * docs(vertex.md): update docs to call finetuned gemini models * feat(proxy_server.py): allow admin to hide proxy model aliases Closes https://github.com/BerriAI/litellm/issues/5692 * docs(load_balancing.md): add docs on hiding alias models from proxy config * fix(base.py): don't raise notimplemented error * fix(user_api_key_auth.py): fix model max budget check * fix(router.py): fix elif * fix(user_api_key_auth.py): don't set team_id to empty str * fix(team_endpoints.py): fix response type * test(test_completion.py): handle predibase error * test(test_proxy_server.py): fix test * fix(o1_transformation.py): fix max_completion_token mapping * test(test_image_generation.py): mark flaky test
This commit is contained in:
parent
60c5d3ebec
commit
713d762411
35 changed files with 1020 additions and 539 deletions
|
@ -76,6 +76,7 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionNamedToolChoiceParam,
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
from litellm.types.utils import FileTypes # type: ignore
|
||||
from litellm.types.utils import (
|
||||
CallTypes,
|
||||
ChatCompletionDeltaToolCall,
|
||||
|
@ -84,7 +85,6 @@ from litellm.types.utils import (
|
|||
Delta,
|
||||
Embedding,
|
||||
EmbeddingResponse,
|
||||
FileTypes,
|
||||
ImageResponse,
|
||||
Message,
|
||||
ModelInfo,
|
||||
|
@ -2339,6 +2339,7 @@ def get_litellm_params(
|
|||
text_completion=None,
|
||||
azure_ad_token_provider=None,
|
||||
user_continue_message=None,
|
||||
base_model=None,
|
||||
):
|
||||
litellm_params = {
|
||||
"acompletion": acompletion,
|
||||
|
@ -2365,6 +2366,8 @@ def get_litellm_params(
|
|||
"text_completion": text_completion,
|
||||
"azure_ad_token_provider": azure_ad_token_provider,
|
||||
"user_continue_message": user_continue_message,
|
||||
"base_model": base_model
|
||||
or _get_base_model_from_litellm_call_metadata(metadata=metadata),
|
||||
}
|
||||
|
||||
return litellm_params
|
||||
|
@ -6063,11 +6066,11 @@ def _calculate_retry_after(
|
|||
max_retries: int,
|
||||
response_headers: Optional[httpx.Headers] = None,
|
||||
min_timeout: int = 0,
|
||||
):
|
||||
) -> Union[float, int]:
|
||||
retry_after = _get_retry_after_from_exception_header(response_headers)
|
||||
|
||||
# If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
|
||||
if 0 < retry_after <= 60:
|
||||
if retry_after is not None and 0 < retry_after <= 60:
|
||||
return retry_after
|
||||
|
||||
initial_retry_delay = 0.5
|
||||
|
@ -10962,6 +10965,22 @@ def get_logging_id(start_time, response_obj):
|
|||
return None
|
||||
|
||||
|
||||
def _get_base_model_from_litellm_call_metadata(
|
||||
metadata: Optional[dict],
|
||||
) -> Optional[str]:
|
||||
if metadata is None:
|
||||
return None
|
||||
|
||||
if metadata is not None:
|
||||
model_info = metadata.get("model_info", {})
|
||||
|
||||
if model_info is not None:
|
||||
base_model = model_info.get("base_model", None)
|
||||
if base_model is not None:
|
||||
return base_model
|
||||
return None
|
||||
|
||||
|
||||
def _get_base_model_from_metadata(model_call_details=None):
|
||||
if model_call_details is None:
|
||||
return None
|
||||
|
@ -10970,13 +10989,7 @@ def _get_base_model_from_metadata(model_call_details=None):
|
|||
if litellm_params is not None:
|
||||
metadata = litellm_params.get("metadata", {})
|
||||
|
||||
if metadata is not None:
|
||||
model_info = metadata.get("model_info", {})
|
||||
|
||||
if model_info is not None:
|
||||
base_model = model_info.get("base_model", None)
|
||||
if base_model is not None:
|
||||
return base_model
|
||||
return _get_base_model_from_litellm_call_metadata(metadata=metadata)
|
||||
return None
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue