mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Merge branch 'BerriAI:main' into fix/groq-custom-pricing-cost
This commit is contained in:
commit
fcd2586909
352 changed files with 14316 additions and 6075 deletions
|
@ -57,9 +57,21 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.audio_utils.utils
|
||||
import litellm.litellm_core_utils.json_validation_rule
|
||||
import litellm.llms
|
||||
import litellm.llms.gemini
|
||||
from litellm.caching._internal_lru_cache import lru_cache_wrapper
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
|
||||
from litellm.constants import (
|
||||
DEFAULT_MAX_LRU_CACHE_SIZE,
|
||||
DEFAULT_TRIM_RATIO,
|
||||
FUNCTION_DEFINITION_TOKEN_COUNT,
|
||||
INITIAL_RETRY_DELAY,
|
||||
JITTER,
|
||||
MAX_RETRY_DELAY,
|
||||
MINIMUM_PROMPT_CACHE_TOKEN_COUNT,
|
||||
TOOL_CHOICE_OBJECT_TOKEN_COUNT,
|
||||
)
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.core_helpers import (
|
||||
|
@ -207,6 +219,7 @@ from litellm.llms.base_llm.base_utils import (
|
|||
from litellm.llms.base_llm.chat.transformation import BaseConfig
|
||||
from litellm.llms.base_llm.completion.transformation import BaseTextCompletionConfig
|
||||
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
|
||||
from litellm.llms.base_llm.files.transformation import BaseFilesConfig
|
||||
from litellm.llms.base_llm.image_variations.transformation import (
|
||||
BaseImageVariationConfig,
|
||||
)
|
||||
|
@ -1259,6 +1272,7 @@ def client(original_function): # noqa: PLR0915
|
|||
logging_obj, kwargs = function_setup(
|
||||
original_function.__name__, rules_obj, start_time, *args, **kwargs
|
||||
)
|
||||
|
||||
kwargs["litellm_logging_obj"] = logging_obj
|
||||
## LOAD CREDENTIALS
|
||||
load_credentials_from_list(kwargs)
|
||||
|
@ -1516,7 +1530,7 @@ def _select_tokenizer(
|
|||
return _select_tokenizer_helper(model=model)
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
@lru_cache(maxsize=DEFAULT_MAX_LRU_CACHE_SIZE)
|
||||
def _select_tokenizer_helper(model: str) -> SelectTokenizerResponse:
|
||||
if litellm.disable_hf_tokenizer_download is True:
|
||||
return _return_openai_tokenizer(model)
|
||||
|
@ -2624,7 +2638,7 @@ def get_optional_params_embeddings( # noqa: PLR0915
|
|||
non_default_params=non_default_params, optional_params={}, kwargs=kwargs
|
||||
)
|
||||
return optional_params
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
elif custom_llm_provider == "vertex_ai" or custom_llm_provider == "gemini":
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model,
|
||||
custom_llm_provider="vertex_ai",
|
||||
|
@ -2839,6 +2853,7 @@ def get_optional_params( # noqa: PLR0915
|
|||
api_version=None,
|
||||
parallel_tool_calls=None,
|
||||
drop_params=None,
|
||||
allowed_openai_params: Optional[List[str]] = None,
|
||||
reasoning_effort=None,
|
||||
additional_drop_params=None,
|
||||
messages: Optional[List[AllMessageValues]] = None,
|
||||
|
@ -2924,6 +2939,7 @@ def get_optional_params( # noqa: PLR0915
|
|||
"api_version": None,
|
||||
"parallel_tool_calls": None,
|
||||
"drop_params": None,
|
||||
"allowed_openai_params": None,
|
||||
"additional_drop_params": None,
|
||||
"messages": None,
|
||||
"reasoning_effort": None,
|
||||
|
@ -2940,6 +2956,7 @@ def get_optional_params( # noqa: PLR0915
|
|||
and k != "custom_llm_provider"
|
||||
and k != "api_version"
|
||||
and k != "drop_params"
|
||||
and k != "allowed_openai_params"
|
||||
and k != "additional_drop_params"
|
||||
and k != "messages"
|
||||
and k in default_params
|
||||
|
@ -3049,6 +3066,12 @@ def get_optional_params( # noqa: PLR0915
|
|||
tool_function["parameters"] = new_parameters
|
||||
|
||||
def _check_valid_arg(supported_params: List[str]):
|
||||
"""
|
||||
Check if the params passed to completion() are supported by the provider
|
||||
|
||||
Args:
|
||||
supported_params: List[str] - supported params from the litellm config
|
||||
"""
|
||||
verbose_logger.info(
|
||||
f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}"
|
||||
)
|
||||
|
@ -3082,7 +3105,7 @@ def get_optional_params( # noqa: PLR0915
|
|||
else:
|
||||
raise UnsupportedParamsError(
|
||||
status_code=500,
|
||||
message=f"{custom_llm_provider} does not support parameters: {unsupported_params}, for model={model}. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n",
|
||||
message=f"{custom_llm_provider} does not support parameters: {list(unsupported_params.keys())}, for model={model}. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n. \n If you want to use these params dynamically send allowed_openai_params={list(unsupported_params.keys())} in your request.",
|
||||
)
|
||||
|
||||
supported_params = get_supported_openai_params(
|
||||
|
@ -3092,7 +3115,14 @@ def get_optional_params( # noqa: PLR0915
|
|||
supported_params = get_supported_openai_params(
|
||||
model=model, custom_llm_provider="openai"
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params or [])
|
||||
|
||||
supported_params = supported_params or []
|
||||
allowed_openai_params = allowed_openai_params or []
|
||||
supported_params.extend(allowed_openai_params)
|
||||
|
||||
_check_valid_arg(
|
||||
supported_params=supported_params or [],
|
||||
)
|
||||
## raise exception if provider doesn't support passed in param
|
||||
if custom_llm_provider == "anthropic":
|
||||
## check if unsupported param passed in
|
||||
|
@ -3195,7 +3225,7 @@ def get_optional_params( # noqa: PLR0915
|
|||
),
|
||||
)
|
||||
elif custom_llm_provider == "huggingface":
|
||||
optional_params = litellm.HuggingfaceConfig().map_openai_params(
|
||||
optional_params = litellm.HuggingFaceChatConfig().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
|
@ -3731,6 +3761,26 @@ def get_optional_params( # noqa: PLR0915
|
|||
if k not in default_params.keys():
|
||||
optional_params[k] = passed_params[k]
|
||||
print_verbose(f"Final returned optional params: {optional_params}")
|
||||
optional_params = _apply_openai_param_overrides(
|
||||
optional_params=optional_params,
|
||||
non_default_params=non_default_params,
|
||||
allowed_openai_params=allowed_openai_params,
|
||||
)
|
||||
return optional_params
|
||||
|
||||
|
||||
def _apply_openai_param_overrides(
|
||||
optional_params: dict, non_default_params: dict, allowed_openai_params: list
|
||||
):
|
||||
"""
|
||||
If user passes in allowed_openai_params, apply them to optional_params
|
||||
|
||||
These params will get passed as is to the LLM API since the user opted in to passing them in the request
|
||||
"""
|
||||
if allowed_openai_params:
|
||||
for param in allowed_openai_params:
|
||||
if param not in optional_params:
|
||||
optional_params[param] = non_default_params.pop(param, None)
|
||||
return optional_params
|
||||
|
||||
|
||||
|
@ -5296,15 +5346,15 @@ def _calculate_retry_after(
|
|||
if retry_after is not None and 0 < retry_after <= 60:
|
||||
return retry_after
|
||||
|
||||
initial_retry_delay = 0.5
|
||||
max_retry_delay = 8.0
|
||||
initial_retry_delay = INITIAL_RETRY_DELAY
|
||||
max_retry_delay = MAX_RETRY_DELAY
|
||||
nb_retries = max_retries - remaining_retries
|
||||
|
||||
# Apply exponential backoff, but not more than the max.
|
||||
sleep_seconds = min(initial_retry_delay * pow(2.0, nb_retries), max_retry_delay)
|
||||
|
||||
# Apply some jitter, plus-or-minus half a second.
|
||||
jitter = 1 - 0.25 * random.random()
|
||||
jitter = JITTER * random.random()
|
||||
timeout = sleep_seconds * jitter
|
||||
return timeout if timeout >= min_timeout else min_timeout
|
||||
|
||||
|
@ -5630,7 +5680,7 @@ def shorten_message_to_fit_limit(message, tokens_needed, model: Optional[str]):
|
|||
def trim_messages(
|
||||
messages,
|
||||
model: Optional[str] = None,
|
||||
trim_ratio: float = 0.75,
|
||||
trim_ratio: float = DEFAULT_TRIM_RATIO,
|
||||
return_response_tokens: bool = False,
|
||||
max_tokens=None,
|
||||
):
|
||||
|
@ -5901,9 +5951,10 @@ class ModelResponseIterator:
|
|||
|
||||
|
||||
class ModelResponseListIterator:
|
||||
def __init__(self, model_responses):
|
||||
def __init__(self, model_responses, delay: Optional[float] = None):
|
||||
self.model_responses = model_responses
|
||||
self.index = 0
|
||||
self.delay = delay
|
||||
|
||||
# Sync iterator
|
||||
def __iter__(self):
|
||||
|
@ -5914,6 +5965,8 @@ class ModelResponseListIterator:
|
|||
raise StopIteration
|
||||
model_response = self.model_responses[self.index]
|
||||
self.index += 1
|
||||
if self.delay:
|
||||
time.sleep(self.delay)
|
||||
return model_response
|
||||
|
||||
# Async iterator
|
||||
|
@ -5925,6 +5978,8 @@ class ModelResponseListIterator:
|
|||
raise StopAsyncIteration
|
||||
model_response = self.model_responses[self.index]
|
||||
self.index += 1
|
||||
if self.delay:
|
||||
await asyncio.sleep(self.delay)
|
||||
return model_response
|
||||
|
||||
|
||||
|
@ -6215,7 +6270,7 @@ class ProviderConfigManager:
|
|||
elif litellm.LlmProviders.REPLICATE == provider:
|
||||
return litellm.ReplicateConfig()
|
||||
elif litellm.LlmProviders.HUGGINGFACE == provider:
|
||||
return litellm.HuggingfaceConfig()
|
||||
return litellm.HuggingFaceChatConfig()
|
||||
elif litellm.LlmProviders.TOGETHER_AI == provider:
|
||||
return litellm.TogetherAIConfig()
|
||||
elif litellm.LlmProviders.OPENROUTER == provider:
|
||||
|
@ -6423,6 +6478,19 @@ class ProviderConfigManager:
|
|||
return litellm.TopazImageVariationConfig()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_provider_files_config(
|
||||
model: str,
|
||||
provider: LlmProviders,
|
||||
) -> Optional[BaseFilesConfig]:
|
||||
if LlmProviders.GEMINI == provider:
|
||||
from litellm.llms.gemini.files.transformation import (
|
||||
GoogleAIStudioFilesHandler, # experimental approach, to reduce bloat on __init__.py
|
||||
)
|
||||
|
||||
return GoogleAIStudioFilesHandler()
|
||||
return None
|
||||
|
||||
|
||||
def get_end_user_id_for_cost_tracking(
|
||||
litellm_params: dict,
|
||||
|
@ -6487,7 +6555,7 @@ def is_prompt_caching_valid_prompt(
|
|||
model=model,
|
||||
use_default_image_token_count=True,
|
||||
)
|
||||
return token_count >= 1024
|
||||
return token_count >= MINIMUM_PROMPT_CACHE_TOKEN_COUNT
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error in is_prompt_caching_valid_prompt: {e}")
|
||||
return False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue