forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_maintain_Claude2_support
This commit is contained in:
commit
f1c39f65d7
24 changed files with 490 additions and 61 deletions
|
@ -2,11 +2,17 @@ import os, types
|
|||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import time
|
||||
import time, uuid
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from .prompt_templates.factory import (
|
||||
prompt_factory,
|
||||
custom_prompt,
|
||||
construct_tool_use_system_prompt,
|
||||
extract_between_tags,
|
||||
parse_xml_params,
|
||||
)
|
||||
import httpx
|
||||
|
||||
|
||||
|
@ -41,6 +47,7 @@ class AnthropicConfig:
|
|||
top_p: Optional[int] = None
|
||||
top_k: Optional[int] = None
|
||||
metadata: Optional[dict] = None
|
||||
system: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -50,6 +57,7 @@ class AnthropicConfig:
|
|||
top_p: Optional[int] = None,
|
||||
top_k: Optional[int] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
system: Optional[str] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
|
@ -108,6 +116,7 @@ def completion(
|
|||
headers={},
|
||||
):
|
||||
headers = validate_environment(api_key, headers)
|
||||
_is_function_call = False
|
||||
if model in custom_prompt_dict:
|
||||
# check if the model has a registered custom prompt
|
||||
model_prompt_details = custom_prompt_dict[model]
|
||||
|
@ -118,38 +127,19 @@ def completion(
|
|||
messages=messages,
|
||||
)
|
||||
else:
|
||||
prompt = prompt_factory(
|
||||
# Separate system prompt from rest of message
|
||||
system_prompt_idx: Optional[int] = None
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
optional_params["system"] = message["content"]
|
||||
system_prompt_idx = idx
|
||||
break
|
||||
if system_prompt_idx is not None:
|
||||
messages.pop(system_prompt_idx)
|
||||
# Format rest of message according to anthropic guidelines
|
||||
messages = prompt_factory(
|
||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||
)
|
||||
"""
|
||||
format messages for anthropic
|
||||
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||
2. The first message always needs to be of role "user"
|
||||
3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
|
||||
4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
|
||||
"""
|
||||
# 1. Anthropic only supports roles like "user" and "assistant"
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
message["role"] = "assistant"
|
||||
|
||||
# if this is the final assistant message, remove trailing whitespace
|
||||
# TODO: only do this if it's the final assistant message
|
||||
if message["role"] == "assistant":
|
||||
message["content"] = message["content"].strip()
|
||||
|
||||
# 2. The first message always needs to be of role "user"
|
||||
if len(messages) > 0:
|
||||
if messages[0]["role"] != "user":
|
||||
# find the index of the first user message
|
||||
for i, message in enumerate(messages):
|
||||
if message["role"] == "user":
|
||||
break
|
||||
|
||||
# remove the user message at existing position and add it to the front
|
||||
messages.pop(i)
|
||||
# move the first user message to the front
|
||||
messages = [message] + messages
|
||||
|
||||
## Load Config
|
||||
config = litellm.AnthropicConfig.get_config()
|
||||
|
@ -159,6 +149,17 @@ def completion(
|
|||
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
|
||||
## Handle Tool Calling
|
||||
if "tools" in optional_params:
|
||||
_is_function_call = True
|
||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||
tools=optional_params["tools"]
|
||||
)
|
||||
optional_params["system"] = (
|
||||
optional_params.get("system", "\n") + tool_calling_system_prompt
|
||||
) # add the anthropic tool calling prompt to the system prompt
|
||||
optional_params.pop("tools")
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
|
@ -167,7 +168,7 @@ def completion(
|
|||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
input=messages,
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
|
@ -225,8 +226,33 @@ def completion(
|
|||
)
|
||||
else:
|
||||
text_content = completion_response["content"][0].get("text", None)
|
||||
model_response.choices[0].message.content = text_content # type: ignore
|
||||
model_response.choices[0].finish_reason = completion_response["stop_reason"]
|
||||
## TOOL CALLING - OUTPUT PARSE
|
||||
if text_content is not None and "invoke" in text_content:
|
||||
function_name = extract_between_tags("tool_name", text_content)[0]
|
||||
function_arguments_str = extract_between_tags("invoke", text_content)[
|
||||
0
|
||||
].strip()
|
||||
function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
|
||||
function_arguments = parse_xml_params(function_arguments_str)
|
||||
_message = litellm.Message(
|
||||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{uuid.uuid4()}",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": function_name,
|
||||
"arguments": json.dumps(function_arguments),
|
||||
},
|
||||
}
|
||||
],
|
||||
content=None,
|
||||
)
|
||||
model_response.choices[0].message = _message # type: ignore
|
||||
else:
|
||||
model_response.choices[0].message.content = text_content # type: ignore
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
completion_response["stop_reason"]
|
||||
)
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_tokens = completion_response["usage"]["input_tokens"]
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
from enum import Enum
|
||||
import requests, traceback
|
||||
import json
|
||||
import json, re, xml.etree.ElementTree as ET
|
||||
from jinja2 import Template, exceptions, Environment, meta
|
||||
from typing import Optional, Any
|
||||
import imghdr, base64
|
||||
|
||||
|
||||
def default_pt(messages):
|
||||
|
@ -390,7 +391,7 @@ def format_prompt_togetherai(messages, prompt_format, chat_template):
|
|||
return prompt
|
||||
|
||||
|
||||
###
|
||||
### ANTHROPIC ###
|
||||
|
||||
|
||||
def anthropic_pt(
|
||||
|
@ -424,6 +425,184 @@ def anthropic_pt(
|
|||
return prompt
|
||||
|
||||
|
||||
def construct_format_parameters_prompt(parameters: dict):
|
||||
parameter_str = "<parameter>\n"
|
||||
for k, v in parameters.items():
|
||||
parameter_str += f"<{k}>"
|
||||
parameter_str += f"{v}"
|
||||
parameter_str += f"</{k}>"
|
||||
parameter_str += "\n</parameter>"
|
||||
return parameter_str
|
||||
|
||||
|
||||
def construct_format_tool_for_claude_prompt(name, description, parameters):
|
||||
constructed_prompt = (
|
||||
"<tool_description>\n"
|
||||
f"<tool_name>{name}</tool_name>\n"
|
||||
"<description>\n"
|
||||
f"{description}\n"
|
||||
"</description>\n"
|
||||
"<parameters>\n"
|
||||
f"{construct_format_parameters_prompt(parameters)}\n"
|
||||
"</parameters>\n"
|
||||
"</tool_description>"
|
||||
)
|
||||
return constructed_prompt
|
||||
|
||||
|
||||
def construct_tool_use_system_prompt(
|
||||
tools,
|
||||
): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
|
||||
tool_str_list = []
|
||||
for tool in tools:
|
||||
tool_str = construct_format_tool_for_claude_prompt(
|
||||
tool["function"]["name"],
|
||||
tool["function"].get("description", ""),
|
||||
tool["function"].get("parameters", {}),
|
||||
)
|
||||
tool_str_list.append(tool_str)
|
||||
tool_use_system_prompt = (
|
||||
"In this environment you have access to a set of tools you can use to answer the user's question.\n"
|
||||
"\n"
|
||||
"You may call them like this:\n"
|
||||
"<function_calls>\n"
|
||||
"<invoke>\n"
|
||||
"<tool_name>$TOOL_NAME</tool_name>\n"
|
||||
"<parameters>\n"
|
||||
"<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>\n"
|
||||
"...\n"
|
||||
"</parameters>\n"
|
||||
"</invoke>\n"
|
||||
"</function_calls>\n"
|
||||
"\n"
|
||||
"Here are the tools available:\n"
|
||||
"<tools>\n" + "\n".join([tool_str for tool_str in tool_str_list]) + "\n</tools>"
|
||||
)
|
||||
return tool_use_system_prompt
|
||||
|
||||
|
||||
def convert_to_anthropic_image_obj(openai_image_url: str):
|
||||
"""
|
||||
Input:
|
||||
"image_url": "data:image/jpeg;base64,{base64_image}",
|
||||
|
||||
Return:
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg",
|
||||
"data": {base64_image},
|
||||
}
|
||||
"""
|
||||
# Extract the base64 image data
|
||||
base64_data = openai_image_url.split("data:image/")[1].split(";base64,")[1]
|
||||
|
||||
# Infer image format from the URL
|
||||
image_format = openai_image_url.split("data:image/")[1].split(";base64,")[0]
|
||||
|
||||
return {
|
||||
"type": "base64",
|
||||
"media_type": f"image/{image_format}",
|
||||
"data": base64_data,
|
||||
}
|
||||
|
||||
|
||||
def anthropic_messages_pt(messages: list):
|
||||
"""
|
||||
format messages for anthropic
|
||||
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||
2. The first message always needs to be of role "user"
|
||||
3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
|
||||
4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
|
||||
5. System messages are a separate param to the Messages API (used for tool calling)
|
||||
"""
|
||||
## Ensure final assistant message has no trailing whitespace
|
||||
last_assistant_message_idx: Optional[int] = None
|
||||
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
|
||||
new_messages = []
|
||||
if len(messages) == 1:
|
||||
# check if the message is a user message
|
||||
if messages[0]["role"] == "assistant":
|
||||
new_messages.append({"role": "user", "content": ""})
|
||||
|
||||
# check if content is a list (vision)
|
||||
if isinstance(messages[0]["content"], list): # vision input
|
||||
new_content = []
|
||||
for m in messages[0]["content"]:
|
||||
if m.get("type", "") == "image_url":
|
||||
new_content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": convert_to_anthropic_image_obj(
|
||||
m["image_url"]["url"]
|
||||
),
|
||||
}
|
||||
)
|
||||
elif m.get("type", "") == "text":
|
||||
new_content.append({"type": "text", "text": m["text"]})
|
||||
new_messages.append({"role": messages[0]["role"], "content": new_content}) # type: ignore
|
||||
else:
|
||||
new_messages.append(messages[0])
|
||||
|
||||
return new_messages
|
||||
|
||||
for i in range(len(messages) - 1): # type: ignore
|
||||
if i == 0 and messages[i]["role"] == "assistant":
|
||||
new_messages.append({"role": "user", "content": ""})
|
||||
if isinstance(messages[i]["content"], list): # vision input
|
||||
new_content = []
|
||||
for m in messages[i]["content"]:
|
||||
if m.get("type", "") == "image_url":
|
||||
new_content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": convert_to_anthropic_image_obj(
|
||||
m["image_url"]["url"]
|
||||
),
|
||||
}
|
||||
)
|
||||
elif m.get("type", "") == "text":
|
||||
new_content.append({"type": "text", "content": m["text"]})
|
||||
new_messages.append({"role": messages[i]["role"], "content": new_content}) # type: ignore
|
||||
else:
|
||||
new_messages.append(messages[i])
|
||||
|
||||
if messages[i]["role"] == messages[i + 1]["role"]:
|
||||
if messages[i]["role"] == "user":
|
||||
new_messages.append({"role": "assistant", "content": ""})
|
||||
else:
|
||||
new_messages.append({"role": "user", "content": ""})
|
||||
|
||||
if messages[i]["role"] == "assistant":
|
||||
last_assistant_message_idx = i
|
||||
|
||||
if last_assistant_message_idx is not None:
|
||||
new_messages[last_assistant_message_idx]["content"] = new_messages[
|
||||
last_assistant_message_idx
|
||||
][
|
||||
"content"
|
||||
].strip() # no trailing whitespace for final assistant message
|
||||
|
||||
return new_messages
|
||||
|
||||
|
||||
def extract_between_tags(tag: str, string: str, strip: bool = False) -> list[str]:
|
||||
ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
|
||||
if strip:
|
||||
ext_list = [e.strip() for e in ext_list]
|
||||
return ext_list
|
||||
|
||||
|
||||
def parse_xml_params(xml_content):
|
||||
root = ET.fromstring(xml_content)
|
||||
params = {}
|
||||
for child in root.findall(".//parameters/*"):
|
||||
params[child.tag] = child.text
|
||||
return params
|
||||
|
||||
|
||||
###
|
||||
|
||||
|
||||
def amazon_titan_pt(
|
||||
messages: list,
|
||||
): # format - https://github.com/BerriAI/litellm/issues/1896
|
||||
|
@ -650,10 +829,9 @@ def prompt_factory(
|
|||
if custom_llm_provider == "ollama":
|
||||
return ollama_pt(model=model, messages=messages)
|
||||
elif custom_llm_provider == "anthropic":
|
||||
if any(_ in model for _ in ["claude-2.1", "claude-v2:1"]):
|
||||
return claude_2_1_pt(messages=messages)
|
||||
else:
|
||||
if model == "claude-instant-1" or model == "claude-2":
|
||||
return anthropic_pt(messages=messages)
|
||||
return anthropic_messages_pt(messages=messages)
|
||||
elif custom_llm_provider == "together_ai":
|
||||
prompt_format, chat_template = get_model_info(token=api_key, model=model)
|
||||
return format_prompt_togetherai(
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/32e93a3d13512de5.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[56239,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-37bd7c3d0bb898a3.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/32e93a3d13512de5.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"p1zjZBLDqxGf-NaFvZkeF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/32e93a3d13512de5.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[57492,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-2ed0bc91ffef505b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/32e93a3d13512de5.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"ZF-EluyKCEJoZptE3dOXT\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[56239,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-37bd7c3d0bb898a3.js"],""]
|
||||
3:I[57492,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-2ed0bc91ffef505b.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["p1zjZBLDqxGf-NaFvZkeF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/32e93a3d13512de5.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["ZF-EluyKCEJoZptE3dOXT",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/32e93a3d13512de5.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -5811,6 +5811,58 @@ async def model_info_v2(
|
|||
return {"data": all_models}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/model/metrics",
|
||||
description="View number of requests & avg latency per model on config.yaml",
|
||||
tags=["model management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def model_metrics(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
global prisma_client
|
||||
if prisma_client is None:
|
||||
raise ProxyException(
|
||||
message="Prisma Client is not initialized",
|
||||
type="internal_error",
|
||||
param="None",
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
sql_query = """
|
||||
SELECT
|
||||
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END AS combined_model_api_base,
|
||||
COUNT(*) AS num_requests,
|
||||
AVG(EXTRACT(epoch FROM ("endTime" - "startTime"))) AS avg_latency_seconds
|
||||
FROM
|
||||
"LiteLLM_SpendLogs"
|
||||
WHERE
|
||||
"startTime" >= NOW() - INTERVAL '10000 hours'
|
||||
GROUP BY
|
||||
CASE WHEN api_base = '' THEN model ELSE CONCAT(model, '-', api_base) END
|
||||
ORDER BY
|
||||
num_requests DESC
|
||||
LIMIT 50;
|
||||
"""
|
||||
|
||||
db_response = await prisma_client.db.query_raw(query=sql_query)
|
||||
response: List[dict] = []
|
||||
if response is not None:
|
||||
# loop through all models
|
||||
for model_data in db_response:
|
||||
model = model_data.get("combined_model_api_base", "")
|
||||
num_requests = model_data.get("num_requests", 0)
|
||||
avg_latency_seconds = model_data.get("avg_latency_seconds", 0)
|
||||
response.append(
|
||||
{
|
||||
"model": model,
|
||||
"num_requests": num_requests,
|
||||
"avg_latency_seconds": avg_latency_seconds,
|
||||
}
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
@router.get(
|
||||
"/model/info",
|
||||
description="Provides more info about each model in /models, including config.yaml descriptions (except api key and api base)",
|
||||
|
|
|
@ -41,10 +41,11 @@ def test_function_call_non_openai_model():
|
|||
pass
|
||||
|
||||
|
||||
test_function_call_non_openai_model()
|
||||
# test_function_call_non_openai_model()
|
||||
|
||||
|
||||
## case 2: add_function_to_prompt set
|
||||
@pytest.mark.skip(reason="Anthropic now supports tool calling")
|
||||
def test_function_call_non_openai_model_litellm_mod_set():
|
||||
litellm.add_function_to_prompt = True
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -351,7 +351,7 @@ def test_gemini_pro_vision_base64():
|
|||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
litellm.num_retries = 3
|
||||
image_path = "cached_logo.jpg"
|
||||
image_path = "../proxy/cached_logo.jpg"
|
||||
# Getting the base64 string
|
||||
base64_image = encode_image(image_path)
|
||||
resp = litellm.completion(
|
||||
|
|
|
@ -100,6 +100,47 @@ def test_completion_claude_3():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_claude_3_function_call():
|
||||
litellm.set_verbose = True
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
||||
try:
|
||||
# test without max tokens
|
||||
response = completion(
|
||||
model="anthropic/claude-3-opus-20240229",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
)
|
||||
# Add any assertions, here to check response args
|
||||
print(response)
|
||||
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
|
||||
assert isinstance(
|
||||
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_claude_3_stream():
|
||||
litellm.set_verbose = False
|
||||
messages = [{"role": "user", "content": "Hello, world"}]
|
||||
|
@ -119,6 +160,51 @@ def test_completion_claude_3_stream():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def encode_image(image_path):
|
||||
import base64
|
||||
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="we already test claude-3, this is just another way to pass images"
|
||||
)
|
||||
def test_completion_claude_3_base64():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
litellm.num_retries = 3
|
||||
image_path = "../proxy/cached_logo.jpg"
|
||||
# Getting the base64 string
|
||||
base64_image = encode_image(image_path)
|
||||
resp = litellm.completion(
|
||||
model="anthropic/claude-3-opus-20240229",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Whats in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "data:image/jpeg;base64," + base64_image
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
print(f"\nResponse: {resp}")
|
||||
|
||||
prompt_tokens = resp.usage.prompt_tokens
|
||||
raise Exception("it worked!")
|
||||
except Exception as e:
|
||||
if "500 Internal error encountered.'" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
def test_completion_mistral_api():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -200,6 +200,10 @@ def map_finish_reason(
|
|||
return "content_filter"
|
||||
elif finish_reason == "STOP": # vertex ai
|
||||
return "stop"
|
||||
elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic
|
||||
return "stop"
|
||||
elif finish_reason == "max_tokens": # anthropic
|
||||
return "length"
|
||||
return finish_reason
|
||||
|
||||
|
||||
|
@ -4106,6 +4110,7 @@ def get_optional_params(
|
|||
and custom_llm_provider != "anyscale"
|
||||
and custom_llm_provider != "together_ai"
|
||||
and custom_llm_provider != "mistral"
|
||||
and custom_llm_provider != "anthropic"
|
||||
):
|
||||
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
||||
# ollama actually supports json output
|
||||
|
@ -4186,7 +4191,15 @@ def get_optional_params(
|
|||
## raise exception if provider doesn't support passed in param
|
||||
if custom_llm_provider == "anthropic":
|
||||
## check if unsupported param passed in
|
||||
supported_params = ["stream", "stop", "temperature", "top_p", "max_tokens"]
|
||||
supported_params = [
|
||||
"stream",
|
||||
"stop",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
]
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
# handle anthropic params
|
||||
if stream:
|
||||
|
@ -4205,6 +4218,9 @@ def get_optional_params(
|
|||
optional_params["max_tokens_to_sample"] = max_tokens
|
||||
else:
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
if tools is not None:
|
||||
optional_params["tools"] = tools
|
||||
elif custom_llm_provider == "cohere":
|
||||
## check if unsupported param passed in
|
||||
supported_params = [
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.28.14"
|
||||
version = "1.29.0"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.28.14"
|
||||
version = "1.29.0"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/32e93a3d13512de5.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[56239,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-37bd7c3d0bb898a3.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/32e93a3d13512de5.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"p1zjZBLDqxGf-NaFvZkeF\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59d9232c3e7a8be6.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/32e93a3d13512de5.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[57492,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-2ed0bc91ffef505b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/32e93a3d13512de5.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"ZF-EluyKCEJoZptE3dOXT\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[56239,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-37bd7c3d0bb898a3.js"],""]
|
||||
3:I[57492,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-2ed0bc91ffef505b.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["p1zjZBLDqxGf-NaFvZkeF",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/32e93a3d13512de5.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["ZF-EluyKCEJoZptE3dOXT",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/32e93a3d13512de5.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -11,7 +11,8 @@ import {
|
|||
Metric,
|
||||
Grid,
|
||||
} from "@tremor/react";
|
||||
import { modelInfoCall, userGetRequesedtModelsCall } from "./networking";
|
||||
import { modelInfoCall, userGetRequesedtModelsCall, modelMetricsCall } from "./networking";
|
||||
import { BarChart } from "@tremor/react";
|
||||
import { Badge, BadgeDelta, Button } from "@tremor/react";
|
||||
import RequestAccess from "./request_model_access";
|
||||
import { Typography } from "antd";
|
||||
|
@ -30,6 +31,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
userID,
|
||||
}) => {
|
||||
const [modelData, setModelData] = useState<any>({ data: [] });
|
||||
const [modelMetrics, setModelMetrics] = useState<any[]>([]);
|
||||
const [pendingRequests, setPendingRequests] = useState<any[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -47,6 +49,15 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
console.log("Model data response:", modelDataResponse.data);
|
||||
setModelData(modelDataResponse);
|
||||
|
||||
const modelMetricsResponse = await modelMetricsCall(
|
||||
accessToken,
|
||||
userID,
|
||||
userRole
|
||||
);
|
||||
|
||||
console.log("Model metrics response:", modelMetricsResponse);
|
||||
setModelMetrics(modelMetricsResponse);
|
||||
|
||||
// if userRole is Admin, show the pending requests
|
||||
if (userRole === "Admin" && accessToken) {
|
||||
const user_requests = await userGetRequesedtModelsCall(accessToken);
|
||||
|
@ -75,8 +86,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
// loop through model data and edit each row
|
||||
for (let i = 0; i < modelData.data.length; i++) {
|
||||
let curr_model = modelData.data[i];
|
||||
let litellm_model_name = curr_model?.litellm_params?.model;
|
||||
|
||||
let litellm_model_name = curr_model?.litellm_params?.mode
|
||||
let model_info = curr_model?.model_info;
|
||||
|
||||
let defaultProvider = "openai";
|
||||
|
@ -109,6 +119,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
modelData.data[i].input_cost = input_cost;
|
||||
modelData.data[i].output_cost = output_cost;
|
||||
modelData.data[i].max_tokens = max_tokens;
|
||||
modelData.data[i].api_base = curr_model?.litellm_params?.api_base;
|
||||
|
||||
all_models_on_proxy.push(curr_model.model_name);
|
||||
|
||||
|
@ -141,6 +152,14 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
<TableCell>
|
||||
<Title>Provider</Title>
|
||||
</TableCell>
|
||||
{
|
||||
userRole === "Admin" && (
|
||||
<TableCell>
|
||||
<Title>API Base</Title>
|
||||
</TableCell>
|
||||
)
|
||||
}
|
||||
|
||||
<TableCell>
|
||||
<Title>Access</Title>
|
||||
</TableCell>
|
||||
|
@ -162,6 +181,11 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
<Title>{model.model_name}</Title>
|
||||
</TableCell>
|
||||
<TableCell>{model.provider}</TableCell>
|
||||
{
|
||||
userRole === "Admin" && (
|
||||
<TableCell>{model.api_base}</TableCell>
|
||||
)
|
||||
}
|
||||
|
||||
<TableCell>
|
||||
{model.user_access ? (
|
||||
|
@ -183,7 +207,18 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
</TableBody>
|
||||
</Table>
|
||||
</Card>
|
||||
{userRole === "Admin" &&
|
||||
<Card>
|
||||
<Title>Model Statistics (Number Requests, Latency)</Title>
|
||||
<BarChart
|
||||
data={modelMetrics}
|
||||
index="model"
|
||||
categories={["num_requests", "avg_latency_seconds"]}
|
||||
colors={["blue", "red"]}
|
||||
yAxisWidth={100}
|
||||
tickGap={5}
|
||||
/>
|
||||
</Card>
|
||||
{/* {userRole === "Admin" &&
|
||||
pendingRequests &&
|
||||
pendingRequests.length > 0 ? (
|
||||
<Card>
|
||||
|
@ -229,7 +264,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
</TableBody>
|
||||
</Table>
|
||||
</Card>
|
||||
) : null}
|
||||
) : null} */}
|
||||
</Grid>
|
||||
</div>
|
||||
);
|
||||
|
|
|
@ -242,6 +242,41 @@ export const modelInfoCall = async (
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
export const modelMetricsCall = async (
|
||||
accessToken: String,
|
||||
userID: String,
|
||||
userRole: String
|
||||
) => {
|
||||
/**
|
||||
* Get all models on proxy
|
||||
*/
|
||||
try {
|
||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics` : `/model/metrics`;
|
||||
// message.info("Requesting model data");
|
||||
const response = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
// message.info("Received model data");
|
||||
return data;
|
||||
// Handle success - you might want to update some state or UI based on the created key
|
||||
} catch (error) {
|
||||
console.error("Failed to create key:", error);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const modelAvailableCall = async (
|
||||
accessToken: String,
|
||||
userID: String,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue