forked from phoenix/litellm-mirror
Merge branch 'BerriAI:main' into feature/watsonx-integration
This commit is contained in:
commit
a77537ddd4
45 changed files with 1027 additions and 281 deletions
|
@ -279,7 +279,7 @@ router_settings:
|
|||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="simple-shuffle" label="(Default) Weighted Pick">
|
||||
<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">
|
||||
|
||||
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ class PrometheusLogger:
|
|||
**kwargs,
|
||||
):
|
||||
try:
|
||||
verbose_logger.debug(f"in init prometheus metrics")
|
||||
print(f"in init prometheus metrics")
|
||||
from prometheus_client import Counter
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
|
|
|
@ -44,9 +44,18 @@ class PrometheusServicesLogger:
|
|||
) # store the prometheus histogram/counter we need to call for each field in payload
|
||||
|
||||
for service in self.services:
|
||||
histogram = self.create_histogram(service)
|
||||
counter = self.create_counter(service)
|
||||
self.payload_to_prometheus_map[service] = [histogram, counter]
|
||||
histogram = self.create_histogram(service, type_of_request="latency")
|
||||
counter_failed_request = self.create_counter(
|
||||
service, type_of_request="failed_requests"
|
||||
)
|
||||
counter_total_requests = self.create_counter(
|
||||
service, type_of_request="total_requests"
|
||||
)
|
||||
self.payload_to_prometheus_map[service] = [
|
||||
histogram,
|
||||
counter_failed_request,
|
||||
counter_total_requests,
|
||||
]
|
||||
|
||||
self.prometheus_to_amount_map: dict = (
|
||||
{}
|
||||
|
@ -74,26 +83,26 @@ class PrometheusServicesLogger:
|
|||
return metric
|
||||
return None
|
||||
|
||||
def create_histogram(self, label: str):
|
||||
metric_name = "litellm_{}_latency".format(label)
|
||||
def create_histogram(self, service: str, type_of_request: str):
|
||||
metric_name = "litellm_{}_{}".format(service, type_of_request)
|
||||
is_registered = self.is_metric_registered(metric_name)
|
||||
if is_registered:
|
||||
return self.get_metric(metric_name)
|
||||
return self.Histogram(
|
||||
metric_name,
|
||||
"Latency for {} service".format(label),
|
||||
labelnames=[label],
|
||||
"Latency for {} service".format(service),
|
||||
labelnames=[service],
|
||||
)
|
||||
|
||||
def create_counter(self, label: str):
|
||||
metric_name = "litellm_{}_failed_requests".format(label)
|
||||
def create_counter(self, service: str, type_of_request: str):
|
||||
metric_name = "litellm_{}_{}".format(service, type_of_request)
|
||||
is_registered = self.is_metric_registered(metric_name)
|
||||
if is_registered:
|
||||
return self.get_metric(metric_name)
|
||||
return self.Counter(
|
||||
metric_name,
|
||||
"Total failed requests for {} service".format(label),
|
||||
labelnames=[label],
|
||||
"Total {} for {} service".format(type_of_request, service),
|
||||
labelnames=[service],
|
||||
)
|
||||
|
||||
def observe_histogram(
|
||||
|
@ -120,6 +129,8 @@ class PrometheusServicesLogger:
|
|||
if self.mock_testing:
|
||||
self.mock_testing_success_calls += 1
|
||||
|
||||
print(f"payload call type: {payload.call_type}")
|
||||
|
||||
if payload.service.value in self.payload_to_prometheus_map:
|
||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||
for obj in prom_objects:
|
||||
|
@ -129,11 +140,19 @@ class PrometheusServicesLogger:
|
|||
labels=payload.service.value,
|
||||
amount=payload.duration,
|
||||
)
|
||||
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
|
||||
self.increment_counter(
|
||||
counter=obj,
|
||||
labels=payload.service.value,
|
||||
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
||||
)
|
||||
|
||||
def service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||
if self.mock_testing:
|
||||
self.mock_testing_failure_calls += 1
|
||||
|
||||
print(f"payload call type: {payload.call_type}")
|
||||
|
||||
if payload.service.value in self.payload_to_prometheus_map:
|
||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||
for obj in prom_objects:
|
||||
|
@ -141,7 +160,7 @@ class PrometheusServicesLogger:
|
|||
self.increment_counter(
|
||||
counter=obj,
|
||||
labels=payload.service.value,
|
||||
amount=1, # LOG ERROR COUNT TO PROMETHEUS
|
||||
amount=1, # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
|
||||
)
|
||||
|
||||
async def async_service_success_hook(self, payload: ServiceLoggerPayload):
|
||||
|
@ -151,6 +170,8 @@ class PrometheusServicesLogger:
|
|||
if self.mock_testing:
|
||||
self.mock_testing_success_calls += 1
|
||||
|
||||
print(f"payload call type: {payload.call_type}")
|
||||
|
||||
if payload.service.value in self.payload_to_prometheus_map:
|
||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||
for obj in prom_objects:
|
||||
|
@ -160,12 +181,20 @@ class PrometheusServicesLogger:
|
|||
labels=payload.service.value,
|
||||
amount=payload.duration,
|
||||
)
|
||||
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
|
||||
self.increment_counter(
|
||||
counter=obj,
|
||||
labels=payload.service.value,
|
||||
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
||||
)
|
||||
|
||||
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||
print(f"received error payload: {payload.error}")
|
||||
if self.mock_testing:
|
||||
self.mock_testing_failure_calls += 1
|
||||
|
||||
print(f"payload call type: {payload.call_type}")
|
||||
|
||||
if payload.service.value in self.payload_to_prometheus_map:
|
||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||
for obj in prom_objects:
|
||||
|
|
|
@ -507,10 +507,11 @@ def construct_tool_use_system_prompt(
|
|||
): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
|
||||
tool_str_list = []
|
||||
for tool in tools:
|
||||
tool_function = get_attribute_or_key(tool, "function")
|
||||
tool_str = construct_format_tool_for_claude_prompt(
|
||||
tool["function"]["name"],
|
||||
tool["function"].get("description", ""),
|
||||
tool["function"].get("parameters", {}),
|
||||
get_attribute_or_key(tool_function, "name"),
|
||||
get_attribute_or_key(tool_function, "description", ""),
|
||||
get_attribute_or_key(tool_function, "parameters", {}),
|
||||
)
|
||||
tool_str_list.append(tool_str)
|
||||
tool_use_system_prompt = (
|
||||
|
@ -634,7 +635,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
|
|||
</function_results>
|
||||
"""
|
||||
name = message.get("name")
|
||||
content = message.get("content")
|
||||
content = message.get("content", "")
|
||||
content = content.replace("<", "<").replace(">", ">").replace("&", "&")
|
||||
|
||||
# We can't determine from openai message format whether it's a successful or
|
||||
# error call result so default to the successful result template
|
||||
|
@ -655,13 +657,15 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
|
|||
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
|
||||
invokes = ""
|
||||
for tool in tool_calls:
|
||||
if tool["type"] != "function":
|
||||
if get_attribute_or_key(tool, "type") != "function":
|
||||
continue
|
||||
|
||||
tool_name = tool["function"]["name"]
|
||||
tool_function = get_attribute_or_key(tool,"function")
|
||||
tool_name = get_attribute_or_key(tool_function, "name")
|
||||
tool_arguments = get_attribute_or_key(tool_function, "arguments")
|
||||
parameters = "".join(
|
||||
f"<{param}>{val}</{param}>\n"
|
||||
for param, val in json.loads(tool["function"]["arguments"]).items()
|
||||
for param, val in json.loads(tool_arguments).items()
|
||||
)
|
||||
invokes += (
|
||||
"<invoke>\n"
|
||||
|
@ -715,7 +719,7 @@ def anthropic_messages_pt_xml(messages: list):
|
|||
{
|
||||
"type": "text",
|
||||
"text": (
|
||||
convert_to_anthropic_tool_result(messages[msg_i])
|
||||
convert_to_anthropic_tool_result_xml(messages[msg_i])
|
||||
if messages[msg_i]["role"] == "tool"
|
||||
else messages[msg_i]["content"]
|
||||
),
|
||||
|
@ -736,7 +740,7 @@ def anthropic_messages_pt_xml(messages: list):
|
|||
if messages[msg_i].get(
|
||||
"tool_calls", []
|
||||
): # support assistant tool invoke convertion
|
||||
assistant_text += convert_to_anthropic_tool_invoke( # type: ignore
|
||||
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
|
||||
messages[msg_i]["tool_calls"]
|
||||
)
|
||||
|
||||
|
@ -848,12 +852,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
|
|||
anthropic_tool_invoke = [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": tool["id"],
|
||||
"name": tool["function"]["name"],
|
||||
"input": json.loads(tool["function"]["arguments"]),
|
||||
"id": get_attribute_or_key(tool, "id"),
|
||||
"name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"),
|
||||
"input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")),
|
||||
}
|
||||
for tool in tool_calls
|
||||
if tool["type"] == "function"
|
||||
if get_attribute_or_key(tool, "type") == "function"
|
||||
]
|
||||
|
||||
return anthropic_tool_invoke
|
||||
|
@ -1074,7 +1078,8 @@ def cohere_message_pt(messages: list):
|
|||
tool_result = convert_openai_message_to_cohere_tool_result(message)
|
||||
tool_results.append(tool_result)
|
||||
else:
|
||||
prompt += message["content"]
|
||||
prompt += message["content"] + "\n\n"
|
||||
prompt = prompt.rstrip()
|
||||
return prompt, tool_results
|
||||
|
||||
|
||||
|
@ -1414,3 +1419,8 @@ def prompt_factory(
|
|||
return default_pt(
|
||||
messages=messages
|
||||
) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
|
||||
|
||||
def get_attribute_or_key(tool_or_function, attribute, default=None):
|
||||
if hasattr(tool_or_function, attribute):
|
||||
return getattr(tool_or_function, attribute)
|
||||
return tool_or_function.get(attribute, default)
|
||||
|
|
|
@ -236,17 +236,19 @@ def completion(
|
|||
if client is None:
|
||||
if vertex_credentials is not None and isinstance(vertex_credentials, str):
|
||||
import google.oauth2.service_account
|
||||
|
||||
json_obj = json.loads(vertex_credentials)
|
||||
|
||||
creds = (
|
||||
google.oauth2.service_account.Credentials.from_service_account_info(
|
||||
json_obj,
|
||||
json.loads(vertex_credentials),
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||||
)
|
||||
)
|
||||
### CHECK IF ACCESS
|
||||
access_token = refresh_auth(credentials=creds)
|
||||
else:
|
||||
import google.auth
|
||||
creds, _ = google.auth.default()
|
||||
### CHECK IF ACCESS
|
||||
access_token = refresh_auth(credentials=creds)
|
||||
|
||||
vertex_ai_client = AnthropicVertex(
|
||||
project_id=vertex_project,
|
||||
|
|
|
@ -610,6 +610,7 @@ def completion(
|
|||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_second",
|
||||
|
@ -2598,6 +2599,7 @@ def embedding(
|
|||
client = kwargs.pop("client", None)
|
||||
rpm = kwargs.pop("rpm", None)
|
||||
tpm = kwargs.pop("tpm", None)
|
||||
max_parallel_requests = kwargs.pop("max_parallel_requests", None)
|
||||
model_info = kwargs.get("model_info", None)
|
||||
metadata = kwargs.get("metadata", None)
|
||||
encoding_format = kwargs.get("encoding_format", None)
|
||||
|
@ -2655,6 +2657,7 @@ def embedding(
|
|||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_second",
|
||||
|
@ -3514,6 +3517,7 @@ def image_generation(
|
|||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"hf_model_name",
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
|
||||
3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -4,14 +4,12 @@ model_list:
|
|||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
# api_base: http://0.0.0.0:8080
|
||||
stream_timeout: 0.001
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/my-fake-model-2
|
||||
api_key: my-fake-key
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
# api_base: http://0.0.0.0:8080
|
||||
stream_timeout: 0.001
|
||||
- litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
|
@ -30,13 +28,6 @@ model_list:
|
|||
# api_key: my-fake-key
|
||||
# api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
# litellm_settings:
|
||||
# success_callback: ["prometheus"]
|
||||
# failure_callback: ["prometheus"]
|
||||
# service_callback: ["prometheus_system"]
|
||||
# upperbound_key_generate_params:
|
||||
# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
||||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2
|
||||
# redis_url: "os.environ/REDIS_URL"
|
||||
|
@ -48,6 +39,10 @@ router_settings:
|
|||
litellm_settings:
|
||||
num_retries: 3 # retry call 3 times on each model_name
|
||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
service_callback: ["prometheus_system"]
|
||||
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
|
|
|
@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/v2/key/info",
|
||||
]
|
||||
|
||||
sso_only_routes: List = [
|
||||
"/key/generate",
|
||||
"/key/update",
|
||||
"/key/delete",
|
||||
"/global/spend/logs",
|
||||
"/global/predict/spend/logs",
|
||||
]
|
||||
|
||||
management_routes: List = [ # key
|
||||
"/key/generate",
|
||||
"/key/update",
|
||||
|
|
|
@ -1053,6 +1053,11 @@ async def user_api_key_auth(
|
|||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="key not allowed to access this team's info",
|
||||
)
|
||||
elif (
|
||||
_has_user_setup_sso()
|
||||
and route in LiteLLMRoutes.sso_only_routes.value
|
||||
):
|
||||
pass
|
||||
else:
|
||||
raise Exception(
|
||||
f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
|
||||
|
@ -1102,6 +1107,13 @@ async def user_api_key_auth(
|
|||
return UserAPIKeyAuth(
|
||||
api_key=api_key, user_role="proxy_admin", **valid_token_dict
|
||||
)
|
||||
elif (
|
||||
_has_user_setup_sso()
|
||||
and route in LiteLLMRoutes.sso_only_routes.value
|
||||
):
|
||||
return UserAPIKeyAuth(
|
||||
api_key=api_key, user_role="app_owner", **valid_token_dict
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
||||
|
@ -5721,6 +5733,20 @@ async def new_user(data: NewUserRequest):
|
|||
"user" # only create a user, don't create key if 'auto_create_key' set to False
|
||||
)
|
||||
response = await generate_key_helper_fn(**data_json)
|
||||
|
||||
# Admin UI Logic
|
||||
# if team_id passed add this user to the team
|
||||
if data_json.get("team_id", None) is not None:
|
||||
await team_member_add(
|
||||
data=TeamMemberAddRequest(
|
||||
team_id=data_json.get("team_id", None),
|
||||
member=Member(
|
||||
user_id=data_json.get("user_id", None),
|
||||
role="user",
|
||||
user_email=data_json.get("user_email", None),
|
||||
),
|
||||
)
|
||||
)
|
||||
return NewUserResponse(
|
||||
key=response.get("token", ""),
|
||||
expires=response.get("expires", None),
|
||||
|
@ -6526,13 +6552,20 @@ async def team_member_add(
|
|||
existing_team_row = await prisma_client.get_data( # type: ignore
|
||||
team_id=data.team_id, table_name="team", query_type="find_unique"
|
||||
)
|
||||
if existing_team_row is None:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
|
||||
},
|
||||
)
|
||||
|
||||
new_member = data.member
|
||||
|
||||
existing_team_row.members_with_roles.append(new_member)
|
||||
|
||||
complete_team_data = LiteLLM_TeamTable(
|
||||
**existing_team_row.model_dump(),
|
||||
**_get_pydantic_json_dict(existing_team_row),
|
||||
)
|
||||
|
||||
team_row = await prisma_client.update_data(
|
||||
|
@ -8120,7 +8153,6 @@ async def auth_callback(request: Request):
|
|||
}
|
||||
user_role = getattr(user_info, "user_role", None)
|
||||
|
||||
else:
|
||||
## check if user-email in db ##
|
||||
user_info = await prisma_client.db.litellm_usertable.find_first(
|
||||
where={"user_email": user_email}
|
||||
|
@ -8142,9 +8174,7 @@ async def auth_callback(request: Request):
|
|||
litellm.default_user_params, dict
|
||||
):
|
||||
user_defined_values = {
|
||||
"models": litellm.default_user_params.get(
|
||||
"models", user_id_models
|
||||
),
|
||||
"models": litellm.default_user_params.get("models", user_id_models),
|
||||
"user_id": litellm.default_user_params.get("user_id", user_id),
|
||||
"user_email": litellm.default_user_params.get(
|
||||
"user_email", user_email
|
||||
|
|
|
@ -238,7 +238,10 @@ class ProxyLogging:
|
|||
litellm_params = kwargs.get("litellm_params", {})
|
||||
model = kwargs.get("model", "")
|
||||
api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
|
||||
messages = kwargs.get("messages", "")
|
||||
messages = kwargs.get("messages", None)
|
||||
# if messages does not exist fallback to "input"
|
||||
if messages is None:
|
||||
messages = kwargs.get("input", None)
|
||||
|
||||
# only use first 100 chars for alerting
|
||||
_messages = str(messages)[:100]
|
||||
|
@ -282,7 +285,10 @@ class ProxyLogging:
|
|||
):
|
||||
if request_data is not None:
|
||||
model = request_data.get("model", "")
|
||||
messages = request_data.get("messages", "")
|
||||
messages = request_data.get("messages", None)
|
||||
if messages is None:
|
||||
# if messages does not exist fallback to "input"
|
||||
messages = request_data.get("input", None)
|
||||
trace_id = request_data.get("metadata", {}).get(
|
||||
"trace_id", None
|
||||
) # get langfuse trace id
|
||||
|
|
|
@ -26,7 +26,12 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
|||
CustomHTTPTransport,
|
||||
AsyncCustomHTTPTransport,
|
||||
)
|
||||
from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
CustomStreamWrapper,
|
||||
get_utc_datetime,
|
||||
calculate_max_parallel_requests,
|
||||
)
|
||||
import copy
|
||||
from litellm._logging import verbose_router_logger
|
||||
import logging
|
||||
|
@ -61,6 +66,7 @@ class Router:
|
|||
num_retries: int = 0,
|
||||
timeout: Optional[float] = None,
|
||||
default_litellm_params={}, # default params for Router.chat.completion.create
|
||||
default_max_parallel_requests: Optional[int] = None,
|
||||
set_verbose: bool = False,
|
||||
debug_level: Literal["DEBUG", "INFO"] = "INFO",
|
||||
fallbacks: List = [],
|
||||
|
@ -198,6 +204,7 @@ class Router:
|
|||
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
|
||||
|
||||
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
|
||||
self.default_max_parallel_requests = default_max_parallel_requests
|
||||
|
||||
if model_list:
|
||||
model_list = copy.deepcopy(model_list)
|
||||
|
@ -213,6 +220,7 @@ class Router:
|
|||
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
||||
self.num_retries = num_retries or litellm.num_retries or 0
|
||||
self.timeout = timeout or litellm.request_timeout
|
||||
|
||||
self.retry_after = retry_after
|
||||
self.routing_strategy = routing_strategy
|
||||
self.fallbacks = fallbacks or litellm.fallbacks
|
||||
|
@ -298,7 +306,7 @@ class Router:
|
|||
else:
|
||||
litellm.failure_callback = [self.deployment_callback_on_failure]
|
||||
verbose_router_logger.info(
|
||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
|
||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
||||
)
|
||||
self.routing_strategy_args = routing_strategy_args
|
||||
|
||||
|
@ -496,7 +504,9 @@ class Router:
|
|||
)
|
||||
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if rpm_semaphore is not None and isinstance(
|
||||
|
@ -681,7 +691,9 @@ class Router:
|
|||
|
||||
### CONCURRENCY-SAFE RPM CHECKS ###
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if rpm_semaphore is not None and isinstance(
|
||||
|
@ -803,7 +815,9 @@ class Router:
|
|||
|
||||
### CONCURRENCY-SAFE RPM CHECKS ###
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if rpm_semaphore is not None and isinstance(
|
||||
|
@ -1049,7 +1063,9 @@ class Router:
|
|||
)
|
||||
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if rpm_semaphore is not None and isinstance(
|
||||
|
@ -1243,7 +1259,9 @@ class Router:
|
|||
|
||||
### CONCURRENCY-SAFE RPM CHECKS ###
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if rpm_semaphore is not None and isinstance(
|
||||
|
@ -1862,17 +1880,23 @@ class Router:
|
|||
model_id = model["model_info"]["id"]
|
||||
# ### IF RPM SET - initialize a semaphore ###
|
||||
rpm = litellm_params.get("rpm", None)
|
||||
if rpm:
|
||||
semaphore = asyncio.Semaphore(rpm)
|
||||
cache_key = f"{model_id}_rpm_client"
|
||||
tpm = litellm_params.get("tpm", None)
|
||||
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||
rpm=rpm,
|
||||
max_parallel_requests=max_parallel_requests,
|
||||
tpm=tpm,
|
||||
default_max_parallel_requests=self.default_max_parallel_requests,
|
||||
)
|
||||
if calculated_max_parallel_requests:
|
||||
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=semaphore,
|
||||
local_only=True,
|
||||
)
|
||||
|
||||
# print("STORES SEMAPHORE IN CACHE")
|
||||
|
||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||
|
@ -2537,8 +2561,8 @@ class Router:
|
|||
The appropriate client based on the given client_type and kwargs.
|
||||
"""
|
||||
model_id = deployment["model_info"]["id"]
|
||||
if client_type == "rpm_client":
|
||||
cache_key = "{}_rpm_client".format(model_id)
|
||||
if client_type == "max_parallel_requests":
|
||||
cache_key = "{}_max_parallel_requests_client".format(model_id)
|
||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||
return client
|
||||
elif client_type == "async":
|
||||
|
@ -2778,6 +2802,7 @@ class Router:
|
|||
"""
|
||||
if (
|
||||
self.routing_strategy != "usage-based-routing-v2"
|
||||
and self.routing_strategy != "simple-shuffle"
|
||||
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
|
||||
return self.get_available_deployment(
|
||||
model=model,
|
||||
|
@ -2828,6 +2853,25 @@ class Router:
|
|||
messages=messages,
|
||||
input=input,
|
||||
)
|
||||
elif self.routing_strategy == "simple-shuffle":
|
||||
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
|
||||
############## Check if we can do a RPM/TPM based weighted pick #################
|
||||
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
|
||||
if rpm is not None:
|
||||
# use weight-random pick if rpms provided
|
||||
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
|
||||
verbose_router_logger.debug(f"\nrpms {rpms}")
|
||||
total_rpm = sum(rpms)
|
||||
weights = [rpm / total_rpm for rpm in rpms]
|
||||
verbose_router_logger.debug(f"\n weights {weights}")
|
||||
# Perform weighted random pick
|
||||
selected_index = random.choices(range(len(rpms)), weights=weights)[0]
|
||||
verbose_router_logger.debug(f"\n selected index, {selected_index}")
|
||||
deployment = healthy_deployments[selected_index]
|
||||
verbose_router_logger.info(
|
||||
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
|
||||
)
|
||||
return deployment or deployment[0]
|
||||
|
||||
if deployment is None:
|
||||
verbose_router_logger.info(
|
||||
|
|
|
@ -407,13 +407,15 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
|||
tpm_keys.append(tpm_key)
|
||||
rpm_keys.append(rpm_key)
|
||||
|
||||
tpm_values = await self.router_cache.async_batch_get_cache(
|
||||
keys=tpm_keys
|
||||
) # [1, 2, None, ..]
|
||||
rpm_values = await self.router_cache.async_batch_get_cache(
|
||||
keys=rpm_keys
|
||||
combined_tpm_rpm_keys = tpm_keys + rpm_keys
|
||||
|
||||
combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache(
|
||||
keys=combined_tpm_rpm_keys
|
||||
) # [1, 2, None, ..]
|
||||
|
||||
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
|
||||
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
|
||||
|
||||
return self._common_checks_available_deployment(
|
||||
model_group=model_group,
|
||||
healthy_deployments=healthy_deployments,
|
||||
|
|
|
@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling():
|
|||
assert isinstance(
|
||||
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||
)
|
||||
messages.append(
|
||||
response.choices[0].message.model_dump()
|
||||
) # Add assistant tool invokes
|
||||
tool_result = (
|
||||
'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
|
||||
)
|
||||
# Add user submitted tool results in the OpenAI format
|
||||
messages.append(
|
||||
{
|
||||
"tool_call_id": response.choices[0].message.tool_calls[0].id,
|
||||
"role": "tool",
|
||||
"name": response.choices[0].message.tool_calls[0].function.name,
|
||||
"content": tool_result,
|
||||
}
|
||||
)
|
||||
# In the second response, Claude should deduce answer from tool results
|
||||
second_response = completion(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
)
|
||||
print(f"second response: {second_response}")
|
||||
assert isinstance(second_response.choices[0].message.content, str)
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
|
|
@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client):
|
|||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
await new_team(
|
||||
NewTeamRequest(
|
||||
team_id="ishaan-special-team",
|
||||
),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||
),
|
||||
)
|
||||
|
||||
_response = await new_user(
|
||||
data=NewUserRequest(
|
||||
models=["azure-gpt-3.5"],
|
||||
|
@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client):
|
|||
|
||||
async def test():
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
|
||||
# create team "litellm-core-infra@gmail.com""
|
||||
print("creating team litellm-core-infra@gmail.com")
|
||||
await new_team(
|
||||
NewTeamRequest(
|
||||
team_id="litellm-core-infra@gmail.com",
|
||||
),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||
),
|
||||
)
|
||||
|
||||
await new_team(
|
||||
NewTeamRequest(
|
||||
team_id="ishaan-special-team",
|
||||
),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||
),
|
||||
)
|
||||
|
||||
request = NewUserRequest(
|
||||
metadata={"team": "litellm-team3", "project": "litellm-project3"},
|
||||
metadata={"project": "litellm-project3"},
|
||||
team_id="litellm-core-infra@gmail.com",
|
||||
)
|
||||
|
||||
key = await new_user(request)
|
||||
print(key)
|
||||
|
||||
|
@ -1015,7 +1046,6 @@ def test_generate_and_update_key(prisma_client):
|
|||
print("\n info for key=", result["info"])
|
||||
assert result["info"]["max_parallel_requests"] == None
|
||||
assert result["info"]["metadata"] == {
|
||||
"team": "litellm-team3",
|
||||
"project": "litellm-project3",
|
||||
}
|
||||
assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
|
||||
|
@ -1037,7 +1067,7 @@ def test_generate_and_update_key(prisma_client):
|
|||
# update the team id
|
||||
response2 = await update_key_fn(
|
||||
request=Request,
|
||||
data=UpdateKeyRequest(key=generated_key, team_id="ishaan"),
|
||||
data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"),
|
||||
)
|
||||
print("response2=", response2)
|
||||
|
||||
|
@ -1048,11 +1078,10 @@ def test_generate_and_update_key(prisma_client):
|
|||
print("\n info for key=", result["info"])
|
||||
assert result["info"]["max_parallel_requests"] == None
|
||||
assert result["info"]["metadata"] == {
|
||||
"team": "litellm-team3",
|
||||
"project": "litellm-project3",
|
||||
}
|
||||
assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
|
||||
assert result["info"]["team_id"] == "ishaan"
|
||||
assert result["info"]["team_id"] == "ishaan-special-team"
|
||||
|
||||
# cleanup - delete key
|
||||
delete_key_request = KeyRequest(keys=[generated_key])
|
||||
|
@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client):
|
|||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
await new_team(
|
||||
NewTeamRequest(
|
||||
team_id="ishaans-special-team",
|
||||
),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||
),
|
||||
)
|
||||
|
||||
_response = await new_user(
|
||||
data=NewUserRequest(
|
||||
models=["azure-gpt-3.5"],
|
||||
|
|
|
@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
|
|||
# Define the expected log messages
|
||||
# - error request, falling back notice, success notice
|
||||
expected_logs = [
|
||||
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None",
|
||||
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None",
|
||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||
|
|
115
litellm/tests/test_router_max_parallel_requests.py
Normal file
115
litellm/tests/test_router_max_parallel_requests.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
# What is this?
|
||||
## Unit tests for the max_parallel_requests feature on Router
|
||||
import sys, os, time, inspect, asyncio, traceback
|
||||
from datetime import datetime
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
from litellm.utils import calculate_max_parallel_requests
|
||||
from typing import Optional
|
||||
|
||||
"""
|
||||
- only rpm
|
||||
- only tpm
|
||||
- only max_parallel_requests
|
||||
- max_parallel_requests + rpm
|
||||
- max_parallel_requests + tpm
|
||||
- max_parallel_requests + tpm + rpm
|
||||
"""
|
||||
|
||||
|
||||
max_parallel_requests_values = [None, 10]
|
||||
tpm_values = [None, 20, 300000]
|
||||
rpm_values = [None, 30]
|
||||
default_max_parallel_requests = [None, 40]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
|
||||
[
|
||||
(mp, tp, rp, dmp)
|
||||
for mp in max_parallel_requests_values
|
||||
for tp in tpm_values
|
||||
for rp in rpm_values
|
||||
for dmp in default_max_parallel_requests
|
||||
],
|
||||
)
|
||||
def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests):
|
||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||
max_parallel_requests=max_parallel_requests,
|
||||
rpm=rpm,
|
||||
tpm=tpm,
|
||||
default_max_parallel_requests=default_max_parallel_requests,
|
||||
)
|
||||
if max_parallel_requests is not None:
|
||||
assert max_parallel_requests == calculated_max_parallel_requests
|
||||
elif rpm is not None:
|
||||
assert rpm == calculated_max_parallel_requests
|
||||
elif tpm is not None:
|
||||
calculated_rpm = int(tpm / 1000 / 6)
|
||||
if calculated_rpm == 0:
|
||||
calculated_rpm = 1
|
||||
print(
|
||||
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}"
|
||||
)
|
||||
assert calculated_rpm == calculated_max_parallel_requests
|
||||
elif default_max_parallel_requests is not None:
|
||||
assert calculated_max_parallel_requests == default_max_parallel_requests
|
||||
else:
|
||||
assert calculated_max_parallel_requests is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
|
||||
[
|
||||
(mp, tp, rp, dmp)
|
||||
for mp in max_parallel_requests_values
|
||||
for tp in tpm_values
|
||||
for rp in rpm_values
|
||||
for dmp in default_max_parallel_requests
|
||||
],
|
||||
)
|
||||
def test_setting_mpr_limits_per_model(
|
||||
max_parallel_requests, tpm, rpm, default_max_parallel_requests
|
||||
):
|
||||
deployment = {
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"max_parallel_requests": max_parallel_requests,
|
||||
"tpm": tpm,
|
||||
"rpm": rpm,
|
||||
},
|
||||
"model_info": {"id": "my-unique-id"},
|
||||
}
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[deployment],
|
||||
default_max_parallel_requests=default_max_parallel_requests,
|
||||
)
|
||||
|
||||
mpr_client: Optional[asyncio.Semaphore] = router._get_client(
|
||||
deployment=deployment,
|
||||
kwargs={},
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if max_parallel_requests is not None:
|
||||
assert max_parallel_requests == mpr_client._value
|
||||
elif rpm is not None:
|
||||
assert rpm == mpr_client._value
|
||||
elif tpm is not None:
|
||||
calculated_rpm = int(tpm / 1000 / 6)
|
||||
if calculated_rpm == 0:
|
||||
calculated_rpm = 1
|
||||
print(
|
||||
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}"
|
||||
)
|
||||
assert calculated_rpm == mpr_client._value
|
||||
elif default_max_parallel_requests is not None:
|
||||
assert mpr_client._value == default_max_parallel_requests
|
||||
else:
|
||||
assert mpr_client is None
|
||||
|
||||
# raise Exception("it worked!")
|
|
@ -5434,6 +5434,49 @@ def get_optional_params(
|
|||
return optional_params
|
||||
|
||||
|
||||
def calculate_max_parallel_requests(
|
||||
max_parallel_requests: Optional[int],
|
||||
rpm: Optional[int],
|
||||
tpm: Optional[int],
|
||||
default_max_parallel_requests: Optional[int],
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Returns the max parallel requests to send to a deployment.
|
||||
|
||||
Used in semaphore for async requests on router.
|
||||
|
||||
Parameters:
|
||||
- max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
|
||||
- rpm - Optional[int] - requests per minute allowed for that deployment
|
||||
- tpm - Optional[int] - tokens per minute allowed for that deployment
|
||||
- default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
|
||||
|
||||
Returns:
|
||||
- int or None (if all params are None)
|
||||
|
||||
Order:
|
||||
max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
|
||||
|
||||
Azure RPM formula:
|
||||
6 rpm per 1000 TPM
|
||||
https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
|
||||
|
||||
|
||||
"""
|
||||
if max_parallel_requests is not None:
|
||||
return max_parallel_requests
|
||||
elif rpm is not None:
|
||||
return rpm
|
||||
elif tpm is not None:
|
||||
calculated_rpm = int(tpm / 1000 / 6)
|
||||
if calculated_rpm == 0:
|
||||
calculated_rpm = 1
|
||||
return calculated_rpm
|
||||
elif default_max_parallel_requests is not None:
|
||||
return default_max_parallel_requests
|
||||
return None
|
||||
|
||||
|
||||
def get_api_base(model: str, optional_params: dict) -> Optional[str]:
|
||||
"""
|
||||
Returns the api base used for calling the model.
|
||||
|
|
|
@ -96,9 +96,9 @@ litellm_settings:
|
|||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
# redis_host: os.environ/REDIS_HOST
|
||||
# redis_password: os.environ/REDIS_PASSWORD
|
||||
# redis_port: os.environ/REDIS_PORT
|
||||
enable_pre_call_checks: true
|
||||
|
||||
general_settings:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.35.17"
|
||||
version = "1.35.18"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.35.17"
|
||||
version = "1.35.18"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -14,6 +14,24 @@ sys.path.insert(
|
|||
import litellm
|
||||
|
||||
|
||||
async def generate_team(session):
|
||||
url = "http://0.0.0.0:4000/team/new"
|
||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||
data = {
|
||||
"team_id": "litellm-dashboard",
|
||||
}
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
print(f"Response (Status code: {status}):")
|
||||
print(response_text)
|
||||
print()
|
||||
_json_response = await response.json()
|
||||
return _json_response
|
||||
|
||||
|
||||
async def generate_user(
|
||||
session,
|
||||
user_role="app_owner",
|
||||
|
@ -668,7 +686,7 @@ async def test_key_rate_limit():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_key_delete():
|
||||
async def test_key_delete_ui():
|
||||
"""
|
||||
Admin UI flow - DO NOT DELETE
|
||||
-> Create a key with user_id = "ishaan"
|
||||
|
@ -680,6 +698,8 @@ async def test_key_delete():
|
|||
key = key_gen["key"]
|
||||
|
||||
# generate a admin UI key
|
||||
team = await generate_team(session=session)
|
||||
print("generated team: ", team)
|
||||
admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
|
||||
print(
|
||||
"trying to delete key=",
|
||||
|
|
|
@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit():
|
|||
await asyncio.gather(*tasks)
|
||||
pytest.fail("Expected at least 1 call to fail")
|
||||
except Exception as e:
|
||||
if "Request did not return a 200 status code: 429" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail(f"Wrong error received - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
|
||||
3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -9,6 +9,7 @@ import Teams from "@/components/teams";
|
|||
import AdminPanel from "@/components/admins";
|
||||
import Settings from "@/components/settings";
|
||||
import GeneralSettings from "@/components/general_settings";
|
||||
import APIRef from "@/components/api_ref";
|
||||
import ChatUI from "@/components/chat_ui";
|
||||
import Sidebar from "../components/leftnav";
|
||||
import Usage from "../components/usage";
|
||||
|
@ -165,6 +166,8 @@ const CreateKeyPage = () => {
|
|||
accessToken={accessToken}
|
||||
showSSOBanner={showSSOBanner}
|
||||
/>
|
||||
) : page == "api_ref" ? (
|
||||
<APIRef/>
|
||||
) : page == "settings" ? (
|
||||
<Settings
|
||||
userID={userID}
|
||||
|
|
152
ui/litellm-dashboard/src/components/api_ref.tsx
Normal file
152
ui/litellm-dashboard/src/components/api_ref.tsx
Normal file
|
@ -0,0 +1,152 @@
|
|||
"use client";
|
||||
import React, { useEffect, useState } from "react";
|
||||
import {
|
||||
Badge,
|
||||
Card,
|
||||
Table,
|
||||
Metric,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableHeaderCell,
|
||||
TableRow,
|
||||
Text,
|
||||
Title,
|
||||
Icon,
|
||||
Accordion,
|
||||
AccordionBody,
|
||||
AccordionHeader,
|
||||
List,
|
||||
ListItem,
|
||||
Tab,
|
||||
TabGroup,
|
||||
TabList,
|
||||
TabPanel,
|
||||
TabPanels,
|
||||
Grid,
|
||||
} from "@tremor/react";
|
||||
import { Statistic } from "antd"
|
||||
import { modelAvailableCall } from "./networking";
|
||||
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
||||
|
||||
|
||||
const APIRef = ({}) => {
|
||||
return (
|
||||
<>
|
||||
<Grid className="gap-2 p-8 h-[80vh] w-full mt-2">
|
||||
<div className="mb-5">
|
||||
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">OpenAI Compatible Proxy: API Reference</p>
|
||||
<Text className="mt-2 mb-2">LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below </Text>
|
||||
|
||||
<TabGroup>
|
||||
<TabList>
|
||||
<Tab>OpenAI Python SDK</Tab>
|
||||
<Tab>LlamaIndex</Tab>
|
||||
<Tab>Langchain Py</Tab>
|
||||
</TabList>
|
||||
<TabPanels>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="your_api_key",
|
||||
base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo", # model to send to the proxy
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
print(response)
|
||||
`}
|
||||
</SyntaxHighlighter>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
import os, dotenv
|
||||
|
||||
from llama_index.llms import AzureOpenAI
|
||||
from llama_index.embeddings import AzureOpenAIEmbedding
|
||||
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
||||
|
||||
llm = AzureOpenAI(
|
||||
engine="azure-gpt-3.5", # model_name on litellm proxy
|
||||
temperature=0.0,
|
||||
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
|
||||
api_key="sk-1234", # litellm proxy API Key
|
||||
api_version="2023-07-01-preview",
|
||||
)
|
||||
|
||||
embed_model = AzureOpenAIEmbedding(
|
||||
deployment_name="azure-embedding-model",
|
||||
azure_endpoint="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
api_version="2023-07-01-preview",
|
||||
)
|
||||
|
||||
|
||||
documents = SimpleDirectoryReader("llama_index_data").load_data()
|
||||
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
||||
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||
|
||||
query_engine = index.as_query_engine()
|
||||
response = query_engine.query("What did the author do growing up?")
|
||||
print(response)
|
||||
|
||||
`}
|
||||
</SyntaxHighlighter>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model = "gpt-3.5-turbo",
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
|
||||
`}
|
||||
</SyntaxHighlighter>
|
||||
</TabPanel>
|
||||
</TabPanels>
|
||||
</TabGroup>
|
||||
|
||||
|
||||
</div>
|
||||
</Grid>
|
||||
|
||||
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
export default APIRef;
|
||||
|
|
@ -13,12 +13,12 @@ import {
|
|||
TabGroup,
|
||||
TabList,
|
||||
TabPanel,
|
||||
TabPanels,
|
||||
Metric,
|
||||
Col,
|
||||
Text,
|
||||
SelectItem,
|
||||
TextInput,
|
||||
TabPanels,
|
||||
Button,
|
||||
} from "@tremor/react";
|
||||
|
||||
|
@ -201,7 +201,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
<TabGroup>
|
||||
<TabList>
|
||||
<Tab>Chat</Tab>
|
||||
<Tab>API Reference</Tab>
|
||||
</TabList>
|
||||
|
||||
<TabPanels>
|
||||
|
@ -272,124 +271,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
</div>
|
||||
</div>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<TabGroup>
|
||||
<TabList>
|
||||
<Tab>OpenAI Python SDK</Tab>
|
||||
<Tab>LlamaIndex</Tab>
|
||||
<Tab>Langchain Py</Tab>
|
||||
</TabList>
|
||||
<TabPanels>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="your_api_key",
|
||||
base_url="http://0.0.0.0:4000" # proxy base url
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo", # model to use from Models Tab
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"generation_name": "ishaan-generation-openai-client",
|
||||
"generation_id": "openai-client-gen-id22",
|
||||
"trace_id": "openai-client-trace-id22",
|
||||
"trace_user_id": "openai-client-user-id2"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
`}
|
||||
</SyntaxHighlighter>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
import os, dotenv
|
||||
|
||||
from llama_index.llms import AzureOpenAI
|
||||
from llama_index.embeddings import AzureOpenAIEmbedding
|
||||
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
||||
|
||||
llm = AzureOpenAI(
|
||||
engine="azure-gpt-3.5", # model_name on litellm proxy
|
||||
temperature=0.0,
|
||||
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
|
||||
api_key="sk-1234", # litellm proxy API Key
|
||||
api_version="2023-07-01-preview",
|
||||
)
|
||||
|
||||
embed_model = AzureOpenAIEmbedding(
|
||||
deployment_name="azure-embedding-model",
|
||||
azure_endpoint="http://0.0.0.0:4000",
|
||||
api_key="sk-1234",
|
||||
api_version="2023-07-01-preview",
|
||||
)
|
||||
|
||||
|
||||
documents = SimpleDirectoryReader("llama_index_data").load_data()
|
||||
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
||||
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||
|
||||
query_engine = index.as_query_engine()
|
||||
response = query_engine.query("What did the author do growing up?")
|
||||
print(response)
|
||||
|
||||
`}
|
||||
</SyntaxHighlighter>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<SyntaxHighlighter language="python">
|
||||
{`
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:8000",
|
||||
model = "gpt-3.5-turbo",
|
||||
temperature=0.1,
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"generation_name": "ishaan-generation-langchain-client",
|
||||
"generation_id": "langchain-client-gen-id22",
|
||||
"trace_id": "langchain-client-trace-id22",
|
||||
"trace_user_id": "langchain-client-user-id2"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
|
||||
`}
|
||||
</SyntaxHighlighter>
|
||||
</TabPanel>
|
||||
</TabPanels>
|
||||
</TabGroup>
|
||||
</TabPanel>
|
||||
</TabPanels>
|
||||
</TabGroup>
|
||||
</Card>
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import React, { useState, useEffect, useRef } from "react";
|
||||
import { Button, TextInput, Grid, Col } from "@tremor/react";
|
||||
import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
|
||||
import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react";
|
||||
import { CopyToClipboard } from 'react-copy-to-clipboard';
|
||||
import {
|
||||
Button as Button2,
|
||||
|
@ -147,6 +147,17 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
|||
mode="multiple"
|
||||
placeholder="Select models"
|
||||
style={{ width: "100%" }}
|
||||
onChange={(values) => {
|
||||
// Check if "All Team Models" is selected
|
||||
const isAllTeamModelsSelected = values.includes("all-team-models");
|
||||
|
||||
// If "All Team Models" is selected, deselect all other models
|
||||
if (isAllTeamModelsSelected) {
|
||||
const newValues = ["all-team-models"];
|
||||
// You can call the form's setFieldsValue method to update the value
|
||||
form.setFieldsValue({ models: newValues });
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Option key="all-team-models" value="all-team-models">
|
||||
All Team Models
|
||||
|
@ -248,16 +259,153 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
|||
</>
|
||||
) : (
|
||||
<>
|
||||
<Form.Item label="Key Name" name="key_alias">
|
||||
<Form.Item
|
||||
label="Key Name"
|
||||
name="key_alias"
|
||||
rules={[{ required: true, message: 'Please input a key name' }]}
|
||||
help="required"
|
||||
>
|
||||
<Input />
|
||||
</Form.Item>
|
||||
<Form.Item label="Team ID (Contact Group)" name="team_id">
|
||||
<Input placeholder="default team (create a new team)" />
|
||||
<Form.Item
|
||||
label="Team ID"
|
||||
name="team_id"
|
||||
hidden={true}
|
||||
initialValue={team ? team["team_id"] : null}
|
||||
valuePropName="team_id"
|
||||
className="mt-8"
|
||||
>
|
||||
<Input value={team ? team["team_alias"] : ""} disabled />
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item label="Description" name="description">
|
||||
<Input.TextArea placeholder="Enter description" rows={4} />
|
||||
<Form.Item
|
||||
label="Models"
|
||||
name="models"
|
||||
className="mb-12"
|
||||
rules={[{ required: true, message: 'Please select a model' }]}
|
||||
help="required"
|
||||
>
|
||||
<Select
|
||||
mode="multiple"
|
||||
placeholder="Select models"
|
||||
style={{ width: "100%" }}
|
||||
onChange={(values) => {
|
||||
const isAllTeamModelsSelected = values.includes("all-team-models");
|
||||
|
||||
if (isAllTeamModelsSelected) {
|
||||
const newValues = ["all-team-models"];
|
||||
form.setFieldsValue({ models: newValues });
|
||||
}
|
||||
}}
|
||||
|
||||
>
|
||||
<Option key="all-team-models" value="all-team-models">
|
||||
All Team Models
|
||||
</Option>
|
||||
{team && team.models ? (
|
||||
team.models.includes("all-proxy-models") ? (
|
||||
userModels.map((model: string) => (
|
||||
(
|
||||
<Option key={model} value={model}>
|
||||
{model}
|
||||
</Option>
|
||||
)
|
||||
))
|
||||
) : (
|
||||
team.models.map((model: string) => (
|
||||
<Option key={model} value={model}>
|
||||
{model}
|
||||
</Option>
|
||||
))
|
||||
)
|
||||
) : (
|
||||
userModels.map((model: string) => (
|
||||
<Option key={model} value={model}>
|
||||
{model}
|
||||
</Option>
|
||||
))
|
||||
)}
|
||||
|
||||
</Select>
|
||||
</Form.Item>
|
||||
|
||||
<Accordion className="mt-20 mb-8" >
|
||||
<AccordionHeader>
|
||||
<b>Optional Settings</b>
|
||||
</AccordionHeader>
|
||||
<AccordionBody>
|
||||
<Form.Item
|
||||
className="mt-8"
|
||||
label="Max Budget (USD)"
|
||||
name="max_budget"
|
||||
help={`Budget cannot exceed team max budget: $${team?.max_budget !== null && team?.max_budget !== undefined ? team?.max_budget : 'unlimited'}`}
|
||||
rules={[
|
||||
{
|
||||
validator: async (_, value) => {
|
||||
if (value && team && team.max_budget !== null && value > team.max_budget) {
|
||||
throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`);
|
||||
}
|
||||
},
|
||||
},
|
||||
]}
|
||||
>
|
||||
<InputNumber step={0.01} precision={2} width={200} />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
className="mt-8"
|
||||
label="Reset Budget"
|
||||
name="budget_duration"
|
||||
help={`Team Reset Budget: ${team?.budget_duration !== null && team?.budget_duration !== undefined ? team?.budget_duration : 'None'}`}
|
||||
>
|
||||
<Select defaultValue={null} placeholder="n/a">
|
||||
<Select.Option value="24h">daily</Select.Option>
|
||||
<Select.Option value="30d">monthly</Select.Option>
|
||||
</Select>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
className="mt-8"
|
||||
label="Tokens per minute Limit (TPM)"
|
||||
name="tpm_limit"
|
||||
help={`TPM cannot exceed team TPM limit: ${team?.tpm_limit !== null && team?.tpm_limit !== undefined ? team?.tpm_limit : 'unlimited'}`}
|
||||
rules={[
|
||||
{
|
||||
validator: async (_, value) => {
|
||||
if (value && team && team.tpm_limit !== null && value > team.tpm_limit) {
|
||||
throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`);
|
||||
}
|
||||
},
|
||||
},
|
||||
]}
|
||||
>
|
||||
<InputNumber step={1} width={400} />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
className="mt-8"
|
||||
label="Requests per minute Limit (RPM)"
|
||||
name="rpm_limit"
|
||||
help={`RPM cannot exceed team RPM limit: ${team?.rpm_limit !== null && team?.rpm_limit !== undefined ? team?.rpm_limit : 'unlimited'}`}
|
||||
rules={[
|
||||
{
|
||||
validator: async (_, value) => {
|
||||
if (value && team && team.rpm_limit !== null && value > team.rpm_limit) {
|
||||
throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`);
|
||||
}
|
||||
},
|
||||
},
|
||||
]}
|
||||
>
|
||||
<InputNumber step={1} width={400} />
|
||||
</Form.Item>
|
||||
<Form.Item label="Expire Key (eg: 30s, 30h, 30d)" name="duration" className="mt-8">
|
||||
<Input />
|
||||
</Form.Item>
|
||||
<Form.Item label="Metadata" name="metadata">
|
||||
<Input.TextArea rows={4} placeholder="Enter metadata as JSON" />
|
||||
</Form.Item>
|
||||
|
||||
</AccordionBody>
|
||||
</Accordion>
|
||||
|
||||
</>
|
||||
)}
|
||||
<div style={{ textAlign: "right", marginTop: "10px" }}>
|
||||
|
|
|
@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react";
|
|||
interface DashboardTeamProps {
|
||||
teams: Object[] | null;
|
||||
setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
|
||||
userRole: string | null;
|
||||
}
|
||||
|
||||
type TeamInterface = {
|
||||
|
@ -15,6 +16,7 @@ type TeamInterface = {
|
|||
const DashboardTeam: React.FC<DashboardTeamProps> = ({
|
||||
teams,
|
||||
setSelectedTeam,
|
||||
userRole,
|
||||
}) => {
|
||||
const defaultTeam: TeamInterface = {
|
||||
models: [],
|
||||
|
@ -25,19 +27,26 @@ const DashboardTeam: React.FC<DashboardTeamProps> = ({
|
|||
|
||||
const [value, setValue] = useState(defaultTeam);
|
||||
|
||||
const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
|
||||
|
||||
let updatedTeams;
|
||||
if (userRole === "App User") {
|
||||
// Non-Admin SSO users should only see their own team - they should not see "Default Team"
|
||||
updatedTeams = teams;
|
||||
} else {
|
||||
updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
|
||||
}
|
||||
if (userRole === 'App User') return null;
|
||||
|
||||
return (
|
||||
<div className="mt-5 mb-5">
|
||||
<Title>Select Team</Title>
|
||||
|
||||
<Text>
|
||||
If you belong to multiple teams, this setting controls which team is
|
||||
used by default when creating new API Keys.
|
||||
If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys.
|
||||
</Text>
|
||||
<Text className="mt-3 mb-3">
|
||||
<b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
|
||||
</Text>
|
||||
|
||||
{updatedTeams && updatedTeams.length > 0 ? (
|
||||
<Select defaultValue="0">
|
||||
{updatedTeams.map((team: any, index) => (
|
||||
|
|
|
@ -46,8 +46,8 @@ const Sidebar: React.FC<SidebarProps> = ({
|
|||
);
|
||||
}
|
||||
return (
|
||||
<Layout style={{ minHeight: "100vh", maxWidth: "120px" }}>
|
||||
<Sider width={120}>
|
||||
<Layout style={{ minHeight: "100vh", maxWidth: "130px" }}>
|
||||
<Sider width={130}>
|
||||
<Menu
|
||||
mode="inline"
|
||||
defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
|
||||
|
@ -63,11 +63,23 @@ const Sidebar: React.FC<SidebarProps> = ({
|
|||
Test Key
|
||||
</Text>
|
||||
</Menu.Item>
|
||||
|
||||
<Menu.Item key="11" onClick={() => setPage("api_ref")}>
|
||||
<Text>
|
||||
API Reference
|
||||
</Text>
|
||||
</Menu.Item>
|
||||
|
||||
{
|
||||
userRole == "Admin" ? (
|
||||
<Menu.Item key="2" onClick={() => setPage("models")}>
|
||||
<Text>
|
||||
Models
|
||||
</Text>
|
||||
</Menu.Item>
|
||||
) : null
|
||||
}
|
||||
|
||||
{userRole == "Admin" ? (
|
||||
<Menu.Item key="6" onClick={() => setPage("teams")}>
|
||||
<Text>
|
||||
|
@ -75,11 +87,18 @@ const Sidebar: React.FC<SidebarProps> = ({
|
|||
</Text>
|
||||
</Menu.Item>
|
||||
) : null}
|
||||
|
||||
{
|
||||
userRole == "Admin" ? (
|
||||
<Menu.Item key="4" onClick={() => setPage("usage")}>
|
||||
<Text>
|
||||
Usage
|
||||
</Text>
|
||||
</Menu.Item>
|
||||
|
||||
) : null
|
||||
}
|
||||
|
||||
{userRole == "Admin" ? (
|
||||
<Menu.Item key="5" onClick={() => setPage("users")}>
|
||||
<Text>
|
||||
|
@ -87,16 +106,27 @@ const Sidebar: React.FC<SidebarProps> = ({
|
|||
</Text>
|
||||
</Menu.Item>
|
||||
) : null}
|
||||
|
||||
{
|
||||
userRole == "Admin" ? (
|
||||
<Menu.Item key="8" onClick={() => setPage("settings")}>
|
||||
<Text>
|
||||
Integrations
|
||||
</Text>
|
||||
</Menu.Item>
|
||||
) : null
|
||||
}
|
||||
|
||||
{
|
||||
userRole == "Admin" ? (
|
||||
<Menu.Item key="9" onClick={() => setPage("general-settings")}>
|
||||
<Text>
|
||||
Settings
|
||||
</Text>
|
||||
</Menu.Item>
|
||||
) : null
|
||||
}
|
||||
|
||||
{userRole == "Admin" ? (
|
||||
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
|
||||
<Text>
|
||||
|
|
|
@ -296,6 +296,9 @@ export const userInfoCall = async (
|
|||
if (userRole == "App Owner" && userID) {
|
||||
url = `${url}?user_id=${userID}`;
|
||||
}
|
||||
if (userRole == "App User" && userID) {
|
||||
url = `${url}?user_id=${userID}`;
|
||||
}
|
||||
console.log("in userInfoCall viewAll=", viewAll);
|
||||
if (viewAll && page_size && (page != null) && (page != undefined)) {
|
||||
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
|
||||
|
|
|
@ -5,6 +5,7 @@ import { Grid, Col, Card, Text, Title } from "@tremor/react";
|
|||
import CreateKey from "./create_key_button";
|
||||
import ViewKeyTable from "./view_key_table";
|
||||
import ViewUserSpend from "./view_user_spend";
|
||||
import ViewUserTeam from "./view_user_team";
|
||||
import DashboardTeam from "./dashboard_default_team";
|
||||
import { useSearchParams, useRouter } from "next/navigation";
|
||||
import { jwtDecode } from "jwt-decode";
|
||||
|
@ -232,11 +233,19 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
|||
<div className="w-full mx-4">
|
||||
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
|
||||
<Col numColSpan={1}>
|
||||
<ViewUserTeam
|
||||
userID={userID}
|
||||
userRole={userRole}
|
||||
selectedTeam={selectedTeam ? selectedTeam : null}
|
||||
accessToken={accessToken}
|
||||
/>
|
||||
<ViewUserSpend
|
||||
userID={userID}
|
||||
userRole={userRole}
|
||||
accessToken={accessToken}
|
||||
userSpend={teamSpend}
|
||||
selectedTeam = {selectedTeam ? selectedTeam : null}
|
||||
|
||||
/>
|
||||
|
||||
<ViewKeyTable
|
||||
|
@ -257,7 +266,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
|||
data={keys}
|
||||
setData={setKeys}
|
||||
/>
|
||||
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} />
|
||||
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
|
||||
</Col>
|
||||
</Grid>
|
||||
</div>
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
import React, { useEffect, useState } from "react";
|
||||
import { keyDeleteCall, getTotalSpendCall } from "./networking";
|
||||
import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
|
||||
import { DonutChart } from "@tremor/react";
|
||||
import { Accordion, AccordionHeader, AccordionList, DonutChart } from "@tremor/react";
|
||||
import {
|
||||
Badge,
|
||||
Card,
|
||||
|
@ -16,9 +16,13 @@ import {
|
|||
Text,
|
||||
Title,
|
||||
Icon,
|
||||
AccordionBody,
|
||||
List,
|
||||
ListItem,
|
||||
|
||||
} from "@tremor/react";
|
||||
import { Statistic } from "antd"
|
||||
import { spendUsersCall } from "./networking";
|
||||
import { spendUsersCall, modelAvailableCall } from "./networking";
|
||||
|
||||
|
||||
// Define the props type
|
||||
|
@ -32,11 +36,13 @@ interface ViewUserSpendProps {
|
|||
userRole: string | null;
|
||||
accessToken: string | null;
|
||||
userSpend: number | null;
|
||||
selectedTeam: any | null;
|
||||
}
|
||||
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend }) => {
|
||||
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend, selectedTeam }) => {
|
||||
console.log(`userSpend: ${userSpend}`)
|
||||
let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
|
||||
const [maxBudget, setMaxBudget] = useState(0.0);
|
||||
const [userModels, setUserModels] = useState([]);
|
||||
useEffect(() => {
|
||||
const fetchData = async () => {
|
||||
if (!accessToken || !userID || !userRole) {
|
||||
|
@ -62,9 +68,30 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
|
|||
}
|
||||
}
|
||||
};
|
||||
const fetchUserModels = async () => {
|
||||
try {
|
||||
if (userID === null || userRole === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (accessToken !== null) {
|
||||
const model_available = await modelAvailableCall(accessToken, userID, userRole);
|
||||
let available_model_names = model_available["data"].map(
|
||||
(element: { id: string }) => element.id
|
||||
);
|
||||
console.log("available_model_names:", available_model_names);
|
||||
setUserModels(available_model_names);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error fetching user models:", error);
|
||||
}
|
||||
};
|
||||
|
||||
fetchUserModels();
|
||||
fetchData();
|
||||
}, [userRole, accessToken]);
|
||||
}, [userRole, accessToken, userID]);
|
||||
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
if (userSpend !== null) {
|
||||
|
@ -72,18 +99,50 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
|
|||
}
|
||||
}, [userSpend])
|
||||
|
||||
// logic to decide what models to display
|
||||
let modelsToDisplay = [];
|
||||
if (selectedTeam && selectedTeam.models) {
|
||||
modelsToDisplay = selectedTeam.models;
|
||||
}
|
||||
|
||||
// check if "all-proxy-models" is in modelsToDisplay
|
||||
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
|
||||
console.log("user models:", userModels);
|
||||
modelsToDisplay = userModels;
|
||||
}
|
||||
|
||||
|
||||
const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";
|
||||
|
||||
const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;
|
||||
|
||||
console.log(`spend in view user spend: ${spend}`)
|
||||
return (
|
||||
<>
|
||||
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">Total Spend </p>
|
||||
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">${roundedSpend}</p>
|
||||
|
||||
</>
|
||||
)
|
||||
<div className="flex items-center">
|
||||
<div>
|
||||
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">
|
||||
Total Spend{" "}
|
||||
</p>
|
||||
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">
|
||||
${roundedSpend}
|
||||
</p>
|
||||
</div>
|
||||
<div className="ml-auto">
|
||||
<Accordion>
|
||||
<AccordionHeader>Models</AccordionHeader>
|
||||
<AccordionBody className="absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs">
|
||||
<List>
|
||||
{modelsToDisplay.map((model: string) => (
|
||||
<ListItem key={model}>
|
||||
<Text>{model}</Text>
|
||||
</ListItem>
|
||||
))}
|
||||
</List>
|
||||
</AccordionBody>
|
||||
</Accordion>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default ViewUserSpend;
|
||||
|
|
78
ui/litellm-dashboard/src/components/view_user_team.tsx
Normal file
78
ui/litellm-dashboard/src/components/view_user_team.tsx
Normal file
|
@ -0,0 +1,78 @@
|
|||
"use client";
|
||||
import React, { useEffect, useState } from "react";
|
||||
import {
|
||||
Badge,
|
||||
Card,
|
||||
Table,
|
||||
Metric,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableHeaderCell,
|
||||
TableRow,
|
||||
Text,
|
||||
Title,
|
||||
Icon,
|
||||
Accordion,
|
||||
AccordionBody,
|
||||
AccordionHeader,
|
||||
List,
|
||||
ListItem,
|
||||
} from "@tremor/react";
|
||||
import { Statistic } from "antd"
|
||||
import { modelAvailableCall } from "./networking";
|
||||
|
||||
|
||||
interface ViewUserTeamProps {
|
||||
userID: string | null;
|
||||
userRole: string | null;
|
||||
selectedTeam: any | null;
|
||||
accessToken: string | null;
|
||||
}
|
||||
const ViewUserTeam: React.FC<ViewUserTeamProps> = ({ userID, userRole, selectedTeam, accessToken}) => {
|
||||
const [userModels, setUserModels] = useState([]);
|
||||
useEffect(() => {
|
||||
const fetchUserModels = async () => {
|
||||
try {
|
||||
if (userID === null || userRole === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (accessToken !== null) {
|
||||
const model_available = await modelAvailableCall(accessToken, userID, userRole);
|
||||
let available_model_names = model_available["data"].map(
|
||||
(element: { id: string }) => element.id
|
||||
);
|
||||
console.log("available_model_names:", available_model_names);
|
||||
setUserModels(available_model_names);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error fetching user models:", error);
|
||||
}
|
||||
};
|
||||
|
||||
fetchUserModels();
|
||||
}, [accessToken, userID, userRole]);
|
||||
|
||||
// logic to decide what models to display
|
||||
let modelsToDisplay = [];
|
||||
if (selectedTeam && selectedTeam.models) {
|
||||
modelsToDisplay = selectedTeam.models;
|
||||
}
|
||||
|
||||
// check if "all-proxy-models" is in modelsToDisplay
|
||||
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
|
||||
console.log("user models:", userModels);
|
||||
modelsToDisplay = userModels;
|
||||
}
|
||||
return (
|
||||
<>
|
||||
<div className="mb-5">
|
||||
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">{selectedTeam?.team_alias}</p>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
export default ViewUserTeam;
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue