forked from phoenix/litellm-mirror
Merge pull request #5029 from BerriAI/litellm_azure_ui_fix
fix(utils.py): Fix adding azure models on ui
This commit is contained in:
commit
5f13d2ee64
5 changed files with 23 additions and 21 deletions
|
@ -1,8 +1,7 @@
|
|||
model_list:
|
||||
- model_name: "predibase-llama"
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "predibase/llama-3-8b-instruct"
|
||||
request_timeout: 1
|
||||
model: "*"
|
||||
|
||||
litellm_settings:
|
||||
failure_callback: ["langfuse"]
|
||||
# litellm_settings:
|
||||
# failure_callback: ["langfuse"]
|
||||
|
|
|
@ -472,11 +472,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
self.print_verbose(f"Inside Max Parallel Request Failure Hook")
|
||||
global_max_parallel_requests = (
|
||||
kwargs["litellm_params"]
|
||||
.get("metadata", {})
|
||||
.get("global_max_parallel_requests", None)
|
||||
self.print_verbose("Inside Max Parallel Request Failure Hook")
|
||||
_metadata = kwargs["litellm_params"].get("metadata", {}) or {}
|
||||
global_max_parallel_requests = _metadata.get(
|
||||
"global_max_parallel_requests", None
|
||||
)
|
||||
user_api_key = (
|
||||
kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
|
||||
|
|
|
@ -1959,6 +1959,7 @@ class ProxyConfig:
|
|||
if len(_value) > 0:
|
||||
_litellm_params[k] = _value
|
||||
_litellm_params = LiteLLM_Params(**_litellm_params)
|
||||
|
||||
else:
|
||||
verbose_proxy_logger.error(
|
||||
f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}"
|
||||
|
|
|
@ -304,7 +304,7 @@ class Message(OpenAIObject):
|
|||
content: Optional[str] = None,
|
||||
role: Literal["assistant"] = "assistant",
|
||||
function_call=None,
|
||||
tool_calls=None,
|
||||
tool_calls: Optional[list] = None,
|
||||
**params,
|
||||
):
|
||||
init_values = {
|
||||
|
@ -322,7 +322,7 @@ class Message(OpenAIObject):
|
|||
)
|
||||
for tool_call in tool_calls
|
||||
]
|
||||
if tool_calls is not None
|
||||
if tool_calls is not None and len(tool_calls) > 0
|
||||
else None
|
||||
),
|
||||
}
|
||||
|
@ -445,8 +445,6 @@ class Choices(OpenAIObject):
|
|||
|
||||
|
||||
class Usage(OpenAIObject):
|
||||
prompt_cache_hit_tokens: Optional[int] = Field(default=None)
|
||||
prompt_cache_miss_tokens: Optional[int] = Field(default=None)
|
||||
prompt_tokens: Optional[int] = Field(default=None)
|
||||
completion_tokens: Optional[int] = Field(default=None)
|
||||
total_tokens: Optional[int] = Field(default=None)
|
||||
|
@ -456,16 +454,15 @@ class Usage(OpenAIObject):
|
|||
prompt_tokens: Optional[int] = None,
|
||||
completion_tokens: Optional[int] = None,
|
||||
total_tokens: Optional[int] = None,
|
||||
prompt_cache_hit_tokens: Optional[int] = None,
|
||||
prompt_cache_miss_tokens: Optional[int] = None,
|
||||
**params,
|
||||
):
|
||||
data = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
"prompt_cache_hit_tokens": prompt_cache_hit_tokens,
|
||||
"prompt_cache_miss_tokens": prompt_cache_miss_tokens,
|
||||
**params,
|
||||
}
|
||||
|
||||
super().__init__(**data)
|
||||
|
||||
def __contains__(self, key):
|
||||
|
|
|
@ -4446,6 +4446,11 @@ def get_llm_provider(
|
|||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
|
||||
if custom_llm_provider:
|
||||
if (
|
||||
model.split("/")[0] == custom_llm_provider
|
||||
): # handle scenario where model="azure/*" and custom_llm_provider="azure"
|
||||
model = model.replace("{}/".format(custom_llm_provider), "")
|
||||
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
|
||||
if api_key and api_key.startswith("os.environ/"):
|
||||
|
@ -5827,9 +5832,10 @@ def convert_to_model_response_object(
|
|||
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_cache_hit_tokens = response_object["usage"].get("prompt_cache_hit_tokens", None) # type: ignore
|
||||
model_response_object.usage.prompt_cache_miss_tokens = response_object["usage"].get("prompt_cache_miss_tokens", None) # type: ignore
|
||||
|
||||
special_keys = ["completion_tokens", "prompt_tokens", "total_tokens"]
|
||||
for k, v in response_object["usage"].items():
|
||||
if k not in special_keys:
|
||||
setattr(model_response_object.usage, k, v) # type: ignore
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"] or int(
|
||||
time.time()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue