Merge pull request #5029 from BerriAI/litellm_azure_ui_fix

fix(utils.py): Fix adding azure models on ui
This commit is contained in:
Krish Dholakia 2024-08-02 22:12:19 -07:00 committed by GitHub
commit 5f13d2ee64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 23 additions and 21 deletions

View file

@ -1,8 +1,7 @@
model_list:
- model_name: "predibase-llama"
- model_name: "*"
litellm_params:
model: "predibase/llama-3-8b-instruct"
request_timeout: 1
model: "*"
litellm_settings:
failure_callback: ["langfuse"]
# litellm_settings:
# failure_callback: ["langfuse"]

View file

@ -472,11 +472,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
self.print_verbose(f"Inside Max Parallel Request Failure Hook")
global_max_parallel_requests = (
kwargs["litellm_params"]
.get("metadata", {})
.get("global_max_parallel_requests", None)
self.print_verbose("Inside Max Parallel Request Failure Hook")
_metadata = kwargs["litellm_params"].get("metadata", {}) or {}
global_max_parallel_requests = _metadata.get(
"global_max_parallel_requests", None
)
user_api_key = (
kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)

View file

@ -1959,6 +1959,7 @@ class ProxyConfig:
if len(_value) > 0:
_litellm_params[k] = _value
_litellm_params = LiteLLM_Params(**_litellm_params)
else:
verbose_proxy_logger.error(
f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}"

View file

@ -304,7 +304,7 @@ class Message(OpenAIObject):
content: Optional[str] = None,
role: Literal["assistant"] = "assistant",
function_call=None,
tool_calls=None,
tool_calls: Optional[list] = None,
**params,
):
init_values = {
@ -322,7 +322,7 @@ class Message(OpenAIObject):
)
for tool_call in tool_calls
]
if tool_calls is not None
if tool_calls is not None and len(tool_calls) > 0
else None
),
}
@ -445,8 +445,6 @@ class Choices(OpenAIObject):
class Usage(OpenAIObject):
prompt_cache_hit_tokens: Optional[int] = Field(default=None)
prompt_cache_miss_tokens: Optional[int] = Field(default=None)
prompt_tokens: Optional[int] = Field(default=None)
completion_tokens: Optional[int] = Field(default=None)
total_tokens: Optional[int] = Field(default=None)
@ -456,16 +454,15 @@ class Usage(OpenAIObject):
prompt_tokens: Optional[int] = None,
completion_tokens: Optional[int] = None,
total_tokens: Optional[int] = None,
prompt_cache_hit_tokens: Optional[int] = None,
prompt_cache_miss_tokens: Optional[int] = None,
**params,
):
data = {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": total_tokens,
"prompt_cache_hit_tokens": prompt_cache_hit_tokens,
"prompt_cache_miss_tokens": prompt_cache_miss_tokens,
**params,
}
super().__init__(**data)
def __contains__(self, key):

View file

@ -4446,6 +4446,11 @@ def get_llm_provider(
return model, custom_llm_provider, dynamic_api_key, api_base
if custom_llm_provider:
if (
model.split("/")[0] == custom_llm_provider
): # handle scenario where model="azure/*" and custom_llm_provider="azure"
model = model.replace("{}/".format(custom_llm_provider), "")
return model, custom_llm_provider, dynamic_api_key, api_base
if api_key and api_key.startswith("os.environ/"):
@ -5827,9 +5832,10 @@ def convert_to_model_response_object(
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
model_response_object.usage.prompt_cache_hit_tokens = response_object["usage"].get("prompt_cache_hit_tokens", None) # type: ignore
model_response_object.usage.prompt_cache_miss_tokens = response_object["usage"].get("prompt_cache_miss_tokens", None) # type: ignore
special_keys = ["completion_tokens", "prompt_tokens", "total_tokens"]
for k, v in response_object["usage"].items():
if k not in special_keys:
setattr(model_response_object.usage, k, v) # type: ignore
if "created" in response_object:
model_response_object.created = response_object["created"] or int(
time.time()