diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 08fa2d954..238fe7136 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,8 +1,7 @@ model_list: - - model_name: "predibase-llama" + - model_name: "*" litellm_params: - model: "predibase/llama-3-8b-instruct" - request_timeout: 1 + model: "*" -litellm_settings: - failure_callback: ["langfuse"] +# litellm_settings: +# failure_callback: ["langfuse"] diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index fe9eaaee0..65c30f10e 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -472,11 +472,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): try: - self.print_verbose(f"Inside Max Parallel Request Failure Hook") - global_max_parallel_requests = ( - kwargs["litellm_params"] - .get("metadata", {}) - .get("global_max_parallel_requests", None) + self.print_verbose("Inside Max Parallel Request Failure Hook") + _metadata = kwargs["litellm_params"].get("metadata", {}) or {} + global_max_parallel_requests = _metadata.get( + "global_max_parallel_requests", None ) user_api_key = ( kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 943a52a96..0f57a5fd1 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1959,6 +1959,7 @@ class ProxyConfig: if len(_value) > 0: _litellm_params[k] = _value _litellm_params = LiteLLM_Params(**_litellm_params) + else: verbose_proxy_logger.error( f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}" diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 72baa37f2..314370f02 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -304,7 +304,7 @@ class Message(OpenAIObject): content: Optional[str] = None, role: Literal["assistant"] = "assistant", function_call=None, - tool_calls=None, + tool_calls: Optional[list] = None, **params, ): init_values = { @@ -322,7 +322,7 @@ class Message(OpenAIObject): ) for tool_call in tool_calls ] - if tool_calls is not None + if tool_calls is not None and len(tool_calls) > 0 else None ), } @@ -445,8 +445,6 @@ class Choices(OpenAIObject): class Usage(OpenAIObject): - prompt_cache_hit_tokens: Optional[int] = Field(default=None) - prompt_cache_miss_tokens: Optional[int] = Field(default=None) prompt_tokens: Optional[int] = Field(default=None) completion_tokens: Optional[int] = Field(default=None) total_tokens: Optional[int] = Field(default=None) @@ -456,16 +454,15 @@ class Usage(OpenAIObject): prompt_tokens: Optional[int] = None, completion_tokens: Optional[int] = None, total_tokens: Optional[int] = None, - prompt_cache_hit_tokens: Optional[int] = None, - prompt_cache_miss_tokens: Optional[int] = None, + **params, ): data = { "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens, - "prompt_cache_hit_tokens": prompt_cache_hit_tokens, - "prompt_cache_miss_tokens": prompt_cache_miss_tokens, + **params, } + super().__init__(**data) def __contains__(self, key): diff --git a/litellm/utils.py b/litellm/utils.py index 916d31cfb..713fe4f2a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4446,6 +4446,11 @@ def get_llm_provider( return model, custom_llm_provider, dynamic_api_key, api_base if custom_llm_provider: + if ( + model.split("/")[0] == custom_llm_provider + ): # handle scenario where model="azure/*" and custom_llm_provider="azure" + model = model.replace("{}/".format(custom_llm_provider), "") + return model, custom_llm_provider, dynamic_api_key, api_base if api_key and api_key.startswith("os.environ/"): @@ -5827,9 +5832,10 @@ def convert_to_model_response_object( model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore - model_response_object.usage.prompt_cache_hit_tokens = response_object["usage"].get("prompt_cache_hit_tokens", None) # type: ignore - model_response_object.usage.prompt_cache_miss_tokens = response_object["usage"].get("prompt_cache_miss_tokens", None) # type: ignore - + special_keys = ["completion_tokens", "prompt_tokens", "total_tokens"] + for k, v in response_object["usage"].items(): + if k not in special_keys: + setattr(model_response_object.usage, k, v) # type: ignore if "created" in response_object: model_response_object.created = response_object["created"] or int( time.time()