forked from phoenix/litellm-mirror
Merge branch 'BerriAI:main' into feature/watsonx-integration
This commit is contained in:
commit
a77537ddd4
45 changed files with 1027 additions and 281 deletions
|
@ -279,7 +279,7 @@ router_settings:
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="simple-shuffle" label="(Default) Weighted Pick">
|
<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">
|
||||||
|
|
||||||
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**
|
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ class PrometheusLogger:
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
verbose_logger.debug(f"in init prometheus metrics")
|
print(f"in init prometheus metrics")
|
||||||
from prometheus_client import Counter
|
from prometheus_client import Counter
|
||||||
|
|
||||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||||
|
|
|
@ -44,9 +44,18 @@ class PrometheusServicesLogger:
|
||||||
) # store the prometheus histogram/counter we need to call for each field in payload
|
) # store the prometheus histogram/counter we need to call for each field in payload
|
||||||
|
|
||||||
for service in self.services:
|
for service in self.services:
|
||||||
histogram = self.create_histogram(service)
|
histogram = self.create_histogram(service, type_of_request="latency")
|
||||||
counter = self.create_counter(service)
|
counter_failed_request = self.create_counter(
|
||||||
self.payload_to_prometheus_map[service] = [histogram, counter]
|
service, type_of_request="failed_requests"
|
||||||
|
)
|
||||||
|
counter_total_requests = self.create_counter(
|
||||||
|
service, type_of_request="total_requests"
|
||||||
|
)
|
||||||
|
self.payload_to_prometheus_map[service] = [
|
||||||
|
histogram,
|
||||||
|
counter_failed_request,
|
||||||
|
counter_total_requests,
|
||||||
|
]
|
||||||
|
|
||||||
self.prometheus_to_amount_map: dict = (
|
self.prometheus_to_amount_map: dict = (
|
||||||
{}
|
{}
|
||||||
|
@ -74,26 +83,26 @@ class PrometheusServicesLogger:
|
||||||
return metric
|
return metric
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def create_histogram(self, label: str):
|
def create_histogram(self, service: str, type_of_request: str):
|
||||||
metric_name = "litellm_{}_latency".format(label)
|
metric_name = "litellm_{}_{}".format(service, type_of_request)
|
||||||
is_registered = self.is_metric_registered(metric_name)
|
is_registered = self.is_metric_registered(metric_name)
|
||||||
if is_registered:
|
if is_registered:
|
||||||
return self.get_metric(metric_name)
|
return self.get_metric(metric_name)
|
||||||
return self.Histogram(
|
return self.Histogram(
|
||||||
metric_name,
|
metric_name,
|
||||||
"Latency for {} service".format(label),
|
"Latency for {} service".format(service),
|
||||||
labelnames=[label],
|
labelnames=[service],
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_counter(self, label: str):
|
def create_counter(self, service: str, type_of_request: str):
|
||||||
metric_name = "litellm_{}_failed_requests".format(label)
|
metric_name = "litellm_{}_{}".format(service, type_of_request)
|
||||||
is_registered = self.is_metric_registered(metric_name)
|
is_registered = self.is_metric_registered(metric_name)
|
||||||
if is_registered:
|
if is_registered:
|
||||||
return self.get_metric(metric_name)
|
return self.get_metric(metric_name)
|
||||||
return self.Counter(
|
return self.Counter(
|
||||||
metric_name,
|
metric_name,
|
||||||
"Total failed requests for {} service".format(label),
|
"Total {} for {} service".format(type_of_request, service),
|
||||||
labelnames=[label],
|
labelnames=[service],
|
||||||
)
|
)
|
||||||
|
|
||||||
def observe_histogram(
|
def observe_histogram(
|
||||||
|
@ -120,6 +129,8 @@ class PrometheusServicesLogger:
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_success_calls += 1
|
self.mock_testing_success_calls += 1
|
||||||
|
|
||||||
|
print(f"payload call type: {payload.call_type}")
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
@ -129,11 +140,19 @@ class PrometheusServicesLogger:
|
||||||
labels=payload.service.value,
|
labels=payload.service.value,
|
||||||
amount=payload.duration,
|
amount=payload.duration,
|
||||||
)
|
)
|
||||||
|
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
|
||||||
|
self.increment_counter(
|
||||||
|
counter=obj,
|
||||||
|
labels=payload.service.value,
|
||||||
|
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
||||||
|
)
|
||||||
|
|
||||||
def service_failure_hook(self, payload: ServiceLoggerPayload):
|
def service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_failure_calls += 1
|
self.mock_testing_failure_calls += 1
|
||||||
|
|
||||||
|
print(f"payload call type: {payload.call_type}")
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
@ -141,7 +160,7 @@ class PrometheusServicesLogger:
|
||||||
self.increment_counter(
|
self.increment_counter(
|
||||||
counter=obj,
|
counter=obj,
|
||||||
labels=payload.service.value,
|
labels=payload.service.value,
|
||||||
amount=1, # LOG ERROR COUNT TO PROMETHEUS
|
amount=1, # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
|
||||||
)
|
)
|
||||||
|
|
||||||
async def async_service_success_hook(self, payload: ServiceLoggerPayload):
|
async def async_service_success_hook(self, payload: ServiceLoggerPayload):
|
||||||
|
@ -151,6 +170,8 @@ class PrometheusServicesLogger:
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_success_calls += 1
|
self.mock_testing_success_calls += 1
|
||||||
|
|
||||||
|
print(f"payload call type: {payload.call_type}")
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
@ -160,12 +181,20 @@ class PrometheusServicesLogger:
|
||||||
labels=payload.service.value,
|
labels=payload.service.value,
|
||||||
amount=payload.duration,
|
amount=payload.duration,
|
||||||
)
|
)
|
||||||
|
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
|
||||||
|
self.increment_counter(
|
||||||
|
counter=obj,
|
||||||
|
labels=payload.service.value,
|
||||||
|
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
|
||||||
|
)
|
||||||
|
|
||||||
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||||
print(f"received error payload: {payload.error}")
|
print(f"received error payload: {payload.error}")
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_failure_calls += 1
|
self.mock_testing_failure_calls += 1
|
||||||
|
|
||||||
|
print(f"payload call type: {payload.call_type}")
|
||||||
|
|
||||||
if payload.service.value in self.payload_to_prometheus_map:
|
if payload.service.value in self.payload_to_prometheus_map:
|
||||||
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
prom_objects = self.payload_to_prometheus_map[payload.service.value]
|
||||||
for obj in prom_objects:
|
for obj in prom_objects:
|
||||||
|
|
|
@ -507,10 +507,11 @@ def construct_tool_use_system_prompt(
|
||||||
): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
|
): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
|
||||||
tool_str_list = []
|
tool_str_list = []
|
||||||
for tool in tools:
|
for tool in tools:
|
||||||
|
tool_function = get_attribute_or_key(tool, "function")
|
||||||
tool_str = construct_format_tool_for_claude_prompt(
|
tool_str = construct_format_tool_for_claude_prompt(
|
||||||
tool["function"]["name"],
|
get_attribute_or_key(tool_function, "name"),
|
||||||
tool["function"].get("description", ""),
|
get_attribute_or_key(tool_function, "description", ""),
|
||||||
tool["function"].get("parameters", {}),
|
get_attribute_or_key(tool_function, "parameters", {}),
|
||||||
)
|
)
|
||||||
tool_str_list.append(tool_str)
|
tool_str_list.append(tool_str)
|
||||||
tool_use_system_prompt = (
|
tool_use_system_prompt = (
|
||||||
|
@ -634,7 +635,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
|
||||||
</function_results>
|
</function_results>
|
||||||
"""
|
"""
|
||||||
name = message.get("name")
|
name = message.get("name")
|
||||||
content = message.get("content")
|
content = message.get("content", "")
|
||||||
|
content = content.replace("<", "<").replace(">", ">").replace("&", "&")
|
||||||
|
|
||||||
# We can't determine from openai message format whether it's a successful or
|
# We can't determine from openai message format whether it's a successful or
|
||||||
# error call result so default to the successful result template
|
# error call result so default to the successful result template
|
||||||
|
@ -655,13 +657,15 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
|
||||||
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
|
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
|
||||||
invokes = ""
|
invokes = ""
|
||||||
for tool in tool_calls:
|
for tool in tool_calls:
|
||||||
if tool["type"] != "function":
|
if get_attribute_or_key(tool, "type") != "function":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tool_name = tool["function"]["name"]
|
tool_function = get_attribute_or_key(tool,"function")
|
||||||
|
tool_name = get_attribute_or_key(tool_function, "name")
|
||||||
|
tool_arguments = get_attribute_or_key(tool_function, "arguments")
|
||||||
parameters = "".join(
|
parameters = "".join(
|
||||||
f"<{param}>{val}</{param}>\n"
|
f"<{param}>{val}</{param}>\n"
|
||||||
for param, val in json.loads(tool["function"]["arguments"]).items()
|
for param, val in json.loads(tool_arguments).items()
|
||||||
)
|
)
|
||||||
invokes += (
|
invokes += (
|
||||||
"<invoke>\n"
|
"<invoke>\n"
|
||||||
|
@ -715,7 +719,7 @@ def anthropic_messages_pt_xml(messages: list):
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": (
|
"text": (
|
||||||
convert_to_anthropic_tool_result(messages[msg_i])
|
convert_to_anthropic_tool_result_xml(messages[msg_i])
|
||||||
if messages[msg_i]["role"] == "tool"
|
if messages[msg_i]["role"] == "tool"
|
||||||
else messages[msg_i]["content"]
|
else messages[msg_i]["content"]
|
||||||
),
|
),
|
||||||
|
@ -736,7 +740,7 @@ def anthropic_messages_pt_xml(messages: list):
|
||||||
if messages[msg_i].get(
|
if messages[msg_i].get(
|
||||||
"tool_calls", []
|
"tool_calls", []
|
||||||
): # support assistant tool invoke convertion
|
): # support assistant tool invoke convertion
|
||||||
assistant_text += convert_to_anthropic_tool_invoke( # type: ignore
|
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
|
||||||
messages[msg_i]["tool_calls"]
|
messages[msg_i]["tool_calls"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -848,12 +852,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
|
||||||
anthropic_tool_invoke = [
|
anthropic_tool_invoke = [
|
||||||
{
|
{
|
||||||
"type": "tool_use",
|
"type": "tool_use",
|
||||||
"id": tool["id"],
|
"id": get_attribute_or_key(tool, "id"),
|
||||||
"name": tool["function"]["name"],
|
"name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"),
|
||||||
"input": json.loads(tool["function"]["arguments"]),
|
"input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")),
|
||||||
}
|
}
|
||||||
for tool in tool_calls
|
for tool in tool_calls
|
||||||
if tool["type"] == "function"
|
if get_attribute_or_key(tool, "type") == "function"
|
||||||
]
|
]
|
||||||
|
|
||||||
return anthropic_tool_invoke
|
return anthropic_tool_invoke
|
||||||
|
@ -1074,7 +1078,8 @@ def cohere_message_pt(messages: list):
|
||||||
tool_result = convert_openai_message_to_cohere_tool_result(message)
|
tool_result = convert_openai_message_to_cohere_tool_result(message)
|
||||||
tool_results.append(tool_result)
|
tool_results.append(tool_result)
|
||||||
else:
|
else:
|
||||||
prompt += message["content"]
|
prompt += message["content"] + "\n\n"
|
||||||
|
prompt = prompt.rstrip()
|
||||||
return prompt, tool_results
|
return prompt, tool_results
|
||||||
|
|
||||||
|
|
||||||
|
@ -1414,3 +1419,8 @@ def prompt_factory(
|
||||||
return default_pt(
|
return default_pt(
|
||||||
messages=messages
|
messages=messages
|
||||||
) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
|
) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
|
||||||
|
|
||||||
|
def get_attribute_or_key(tool_or_function, attribute, default=None):
|
||||||
|
if hasattr(tool_or_function, attribute):
|
||||||
|
return getattr(tool_or_function, attribute)
|
||||||
|
return tool_or_function.get(attribute, default)
|
||||||
|
|
|
@ -236,17 +236,19 @@ def completion(
|
||||||
if client is None:
|
if client is None:
|
||||||
if vertex_credentials is not None and isinstance(vertex_credentials, str):
|
if vertex_credentials is not None and isinstance(vertex_credentials, str):
|
||||||
import google.oauth2.service_account
|
import google.oauth2.service_account
|
||||||
|
|
||||||
json_obj = json.loads(vertex_credentials)
|
|
||||||
|
|
||||||
creds = (
|
creds = (
|
||||||
google.oauth2.service_account.Credentials.from_service_account_info(
|
google.oauth2.service_account.Credentials.from_service_account_info(
|
||||||
json_obj,
|
json.loads(vertex_credentials),
|
||||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
### CHECK IF ACCESS
|
### CHECK IF ACCESS
|
||||||
access_token = refresh_auth(credentials=creds)
|
access_token = refresh_auth(credentials=creds)
|
||||||
|
else:
|
||||||
|
import google.auth
|
||||||
|
creds, _ = google.auth.default()
|
||||||
|
### CHECK IF ACCESS
|
||||||
|
access_token = refresh_auth(credentials=creds)
|
||||||
|
|
||||||
vertex_ai_client = AnthropicVertex(
|
vertex_ai_client = AnthropicVertex(
|
||||||
project_id=vertex_project,
|
project_id=vertex_project,
|
||||||
|
|
|
@ -610,6 +610,7 @@ def completion(
|
||||||
"client",
|
"client",
|
||||||
"rpm",
|
"rpm",
|
||||||
"tpm",
|
"tpm",
|
||||||
|
"max_parallel_requests",
|
||||||
"input_cost_per_token",
|
"input_cost_per_token",
|
||||||
"output_cost_per_token",
|
"output_cost_per_token",
|
||||||
"input_cost_per_second",
|
"input_cost_per_second",
|
||||||
|
@ -2598,6 +2599,7 @@ def embedding(
|
||||||
client = kwargs.pop("client", None)
|
client = kwargs.pop("client", None)
|
||||||
rpm = kwargs.pop("rpm", None)
|
rpm = kwargs.pop("rpm", None)
|
||||||
tpm = kwargs.pop("tpm", None)
|
tpm = kwargs.pop("tpm", None)
|
||||||
|
max_parallel_requests = kwargs.pop("max_parallel_requests", None)
|
||||||
model_info = kwargs.get("model_info", None)
|
model_info = kwargs.get("model_info", None)
|
||||||
metadata = kwargs.get("metadata", None)
|
metadata = kwargs.get("metadata", None)
|
||||||
encoding_format = kwargs.get("encoding_format", None)
|
encoding_format = kwargs.get("encoding_format", None)
|
||||||
|
@ -2655,6 +2657,7 @@ def embedding(
|
||||||
"client",
|
"client",
|
||||||
"rpm",
|
"rpm",
|
||||||
"tpm",
|
"tpm",
|
||||||
|
"max_parallel_requests",
|
||||||
"input_cost_per_token",
|
"input_cost_per_token",
|
||||||
"output_cost_per_token",
|
"output_cost_per_token",
|
||||||
"input_cost_per_second",
|
"input_cost_per_second",
|
||||||
|
@ -3514,6 +3517,7 @@ def image_generation(
|
||||||
"client",
|
"client",
|
||||||
"rpm",
|
"rpm",
|
||||||
"tpm",
|
"tpm",
|
||||||
|
"max_parallel_requests",
|
||||||
"input_cost_per_token",
|
"input_cost_per_token",
|
||||||
"output_cost_per_token",
|
"output_cost_per_token",
|
||||||
"hf_model_name",
|
"hf_model_name",
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
|
3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -4,14 +4,12 @@ model_list:
|
||||||
model: openai/my-fake-model
|
model: openai/my-fake-model
|
||||||
api_key: my-fake-key
|
api_key: my-fake-key
|
||||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||||
# api_base: http://0.0.0.0:8080
|
|
||||||
stream_timeout: 0.001
|
stream_timeout: 0.001
|
||||||
- model_name: fake-openai-endpoint
|
- model_name: fake-openai-endpoint
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/my-fake-model-2
|
model: openai/my-fake-model-2
|
||||||
api_key: my-fake-key
|
api_key: my-fake-key
|
||||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||||
# api_base: http://0.0.0.0:8080
|
|
||||||
stream_timeout: 0.001
|
stream_timeout: 0.001
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
|
@ -30,13 +28,6 @@ model_list:
|
||||||
# api_key: my-fake-key
|
# api_key: my-fake-key
|
||||||
# api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
# api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
# litellm_settings:
|
|
||||||
# success_callback: ["prometheus"]
|
|
||||||
# failure_callback: ["prometheus"]
|
|
||||||
# service_callback: ["prometheus_system"]
|
|
||||||
# upperbound_key_generate_params:
|
|
||||||
# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
routing_strategy: usage-based-routing-v2
|
routing_strategy: usage-based-routing-v2
|
||||||
# redis_url: "os.environ/REDIS_URL"
|
# redis_url: "os.environ/REDIS_URL"
|
||||||
|
@ -48,6 +39,10 @@ router_settings:
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
num_retries: 3 # retry call 3 times on each model_name
|
num_retries: 3 # retry call 3 times on each model_name
|
||||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||||
|
success_callback: ["prometheus"]
|
||||||
|
failure_callback: ["prometheus"]
|
||||||
|
service_callback: ["prometheus_system"]
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
|
|
|
@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/v2/key/info",
|
"/v2/key/info",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
sso_only_routes: List = [
|
||||||
|
"/key/generate",
|
||||||
|
"/key/update",
|
||||||
|
"/key/delete",
|
||||||
|
"/global/spend/logs",
|
||||||
|
"/global/predict/spend/logs",
|
||||||
|
]
|
||||||
|
|
||||||
management_routes: List = [ # key
|
management_routes: List = [ # key
|
||||||
"/key/generate",
|
"/key/generate",
|
||||||
"/key/update",
|
"/key/update",
|
||||||
|
|
|
@ -1053,6 +1053,11 @@ async def user_api_key_auth(
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
detail="key not allowed to access this team's info",
|
detail="key not allowed to access this team's info",
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
_has_user_setup_sso()
|
||||||
|
and route in LiteLLMRoutes.sso_only_routes.value
|
||||||
|
):
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
|
f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
|
||||||
|
@ -1102,6 +1107,13 @@ async def user_api_key_auth(
|
||||||
return UserAPIKeyAuth(
|
return UserAPIKeyAuth(
|
||||||
api_key=api_key, user_role="proxy_admin", **valid_token_dict
|
api_key=api_key, user_role="proxy_admin", **valid_token_dict
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
_has_user_setup_sso()
|
||||||
|
and route in LiteLLMRoutes.sso_only_routes.value
|
||||||
|
):
|
||||||
|
return UserAPIKeyAuth(
|
||||||
|
api_key=api_key, user_role="app_owner", **valid_token_dict
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
||||||
|
@ -5721,6 +5733,20 @@ async def new_user(data: NewUserRequest):
|
||||||
"user" # only create a user, don't create key if 'auto_create_key' set to False
|
"user" # only create a user, don't create key if 'auto_create_key' set to False
|
||||||
)
|
)
|
||||||
response = await generate_key_helper_fn(**data_json)
|
response = await generate_key_helper_fn(**data_json)
|
||||||
|
|
||||||
|
# Admin UI Logic
|
||||||
|
# if team_id passed add this user to the team
|
||||||
|
if data_json.get("team_id", None) is not None:
|
||||||
|
await team_member_add(
|
||||||
|
data=TeamMemberAddRequest(
|
||||||
|
team_id=data_json.get("team_id", None),
|
||||||
|
member=Member(
|
||||||
|
user_id=data_json.get("user_id", None),
|
||||||
|
role="user",
|
||||||
|
user_email=data_json.get("user_email", None),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
return NewUserResponse(
|
return NewUserResponse(
|
||||||
key=response.get("token", ""),
|
key=response.get("token", ""),
|
||||||
expires=response.get("expires", None),
|
expires=response.get("expires", None),
|
||||||
|
@ -6526,13 +6552,20 @@ async def team_member_add(
|
||||||
existing_team_row = await prisma_client.get_data( # type: ignore
|
existing_team_row = await prisma_client.get_data( # type: ignore
|
||||||
team_id=data.team_id, table_name="team", query_type="find_unique"
|
team_id=data.team_id, table_name="team", query_type="find_unique"
|
||||||
)
|
)
|
||||||
|
if existing_team_row is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={
|
||||||
|
"error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
new_member = data.member
|
new_member = data.member
|
||||||
|
|
||||||
existing_team_row.members_with_roles.append(new_member)
|
existing_team_row.members_with_roles.append(new_member)
|
||||||
|
|
||||||
complete_team_data = LiteLLM_TeamTable(
|
complete_team_data = LiteLLM_TeamTable(
|
||||||
**existing_team_row.model_dump(),
|
**_get_pydantic_json_dict(existing_team_row),
|
||||||
)
|
)
|
||||||
|
|
||||||
team_row = await prisma_client.update_data(
|
team_row = await prisma_client.update_data(
|
||||||
|
@ -8120,7 +8153,6 @@ async def auth_callback(request: Request):
|
||||||
}
|
}
|
||||||
user_role = getattr(user_info, "user_role", None)
|
user_role = getattr(user_info, "user_role", None)
|
||||||
|
|
||||||
else:
|
|
||||||
## check if user-email in db ##
|
## check if user-email in db ##
|
||||||
user_info = await prisma_client.db.litellm_usertable.find_first(
|
user_info = await prisma_client.db.litellm_usertable.find_first(
|
||||||
where={"user_email": user_email}
|
where={"user_email": user_email}
|
||||||
|
@ -8142,9 +8174,7 @@ async def auth_callback(request: Request):
|
||||||
litellm.default_user_params, dict
|
litellm.default_user_params, dict
|
||||||
):
|
):
|
||||||
user_defined_values = {
|
user_defined_values = {
|
||||||
"models": litellm.default_user_params.get(
|
"models": litellm.default_user_params.get("models", user_id_models),
|
||||||
"models", user_id_models
|
|
||||||
),
|
|
||||||
"user_id": litellm.default_user_params.get("user_id", user_id),
|
"user_id": litellm.default_user_params.get("user_id", user_id),
|
||||||
"user_email": litellm.default_user_params.get(
|
"user_email": litellm.default_user_params.get(
|
||||||
"user_email", user_email
|
"user_email", user_email
|
||||||
|
|
|
@ -238,7 +238,10 @@ class ProxyLogging:
|
||||||
litellm_params = kwargs.get("litellm_params", {})
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
model = kwargs.get("model", "")
|
model = kwargs.get("model", "")
|
||||||
api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
|
api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
|
||||||
messages = kwargs.get("messages", "")
|
messages = kwargs.get("messages", None)
|
||||||
|
# if messages does not exist fallback to "input"
|
||||||
|
if messages is None:
|
||||||
|
messages = kwargs.get("input", None)
|
||||||
|
|
||||||
# only use first 100 chars for alerting
|
# only use first 100 chars for alerting
|
||||||
_messages = str(messages)[:100]
|
_messages = str(messages)[:100]
|
||||||
|
@ -282,7 +285,10 @@ class ProxyLogging:
|
||||||
):
|
):
|
||||||
if request_data is not None:
|
if request_data is not None:
|
||||||
model = request_data.get("model", "")
|
model = request_data.get("model", "")
|
||||||
messages = request_data.get("messages", "")
|
messages = request_data.get("messages", None)
|
||||||
|
if messages is None:
|
||||||
|
# if messages does not exist fallback to "input"
|
||||||
|
messages = request_data.get("input", None)
|
||||||
trace_id = request_data.get("metadata", {}).get(
|
trace_id = request_data.get("metadata", {}).get(
|
||||||
"trace_id", None
|
"trace_id", None
|
||||||
) # get langfuse trace id
|
) # get langfuse trace id
|
||||||
|
|
|
@ -26,7 +26,12 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
||||||
CustomHTTPTransport,
|
CustomHTTPTransport,
|
||||||
AsyncCustomHTTPTransport,
|
AsyncCustomHTTPTransport,
|
||||||
)
|
)
|
||||||
from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
|
from litellm.utils import (
|
||||||
|
ModelResponse,
|
||||||
|
CustomStreamWrapper,
|
||||||
|
get_utc_datetime,
|
||||||
|
calculate_max_parallel_requests,
|
||||||
|
)
|
||||||
import copy
|
import copy
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
import logging
|
import logging
|
||||||
|
@ -61,6 +66,7 @@ class Router:
|
||||||
num_retries: int = 0,
|
num_retries: int = 0,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
default_litellm_params={}, # default params for Router.chat.completion.create
|
default_litellm_params={}, # default params for Router.chat.completion.create
|
||||||
|
default_max_parallel_requests: Optional[int] = None,
|
||||||
set_verbose: bool = False,
|
set_verbose: bool = False,
|
||||||
debug_level: Literal["DEBUG", "INFO"] = "INFO",
|
debug_level: Literal["DEBUG", "INFO"] = "INFO",
|
||||||
fallbacks: List = [],
|
fallbacks: List = [],
|
||||||
|
@ -198,6 +204,7 @@ class Router:
|
||||||
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
|
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
|
||||||
|
|
||||||
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
|
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
|
||||||
|
self.default_max_parallel_requests = default_max_parallel_requests
|
||||||
|
|
||||||
if model_list:
|
if model_list:
|
||||||
model_list = copy.deepcopy(model_list)
|
model_list = copy.deepcopy(model_list)
|
||||||
|
@ -213,6 +220,7 @@ class Router:
|
||||||
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
||||||
self.num_retries = num_retries or litellm.num_retries or 0
|
self.num_retries = num_retries or litellm.num_retries or 0
|
||||||
self.timeout = timeout or litellm.request_timeout
|
self.timeout = timeout or litellm.request_timeout
|
||||||
|
|
||||||
self.retry_after = retry_after
|
self.retry_after = retry_after
|
||||||
self.routing_strategy = routing_strategy
|
self.routing_strategy = routing_strategy
|
||||||
self.fallbacks = fallbacks or litellm.fallbacks
|
self.fallbacks = fallbacks or litellm.fallbacks
|
||||||
|
@ -298,7 +306,7 @@ class Router:
|
||||||
else:
|
else:
|
||||||
litellm.failure_callback = [self.deployment_callback_on_failure]
|
litellm.failure_callback = [self.deployment_callback_on_failure]
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
|
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
||||||
)
|
)
|
||||||
self.routing_strategy_args = routing_strategy_args
|
self.routing_strategy_args = routing_strategy_args
|
||||||
|
|
||||||
|
@ -496,7 +504,9 @@ class Router:
|
||||||
)
|
)
|
||||||
|
|
||||||
rpm_semaphore = self._get_client(
|
rpm_semaphore = self._get_client(
|
||||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
deployment=deployment,
|
||||||
|
kwargs=kwargs,
|
||||||
|
client_type="max_parallel_requests",
|
||||||
)
|
)
|
||||||
|
|
||||||
if rpm_semaphore is not None and isinstance(
|
if rpm_semaphore is not None and isinstance(
|
||||||
|
@ -681,7 +691,9 @@ class Router:
|
||||||
|
|
||||||
### CONCURRENCY-SAFE RPM CHECKS ###
|
### CONCURRENCY-SAFE RPM CHECKS ###
|
||||||
rpm_semaphore = self._get_client(
|
rpm_semaphore = self._get_client(
|
||||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
deployment=deployment,
|
||||||
|
kwargs=kwargs,
|
||||||
|
client_type="max_parallel_requests",
|
||||||
)
|
)
|
||||||
|
|
||||||
if rpm_semaphore is not None and isinstance(
|
if rpm_semaphore is not None and isinstance(
|
||||||
|
@ -803,7 +815,9 @@ class Router:
|
||||||
|
|
||||||
### CONCURRENCY-SAFE RPM CHECKS ###
|
### CONCURRENCY-SAFE RPM CHECKS ###
|
||||||
rpm_semaphore = self._get_client(
|
rpm_semaphore = self._get_client(
|
||||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
deployment=deployment,
|
||||||
|
kwargs=kwargs,
|
||||||
|
client_type="max_parallel_requests",
|
||||||
)
|
)
|
||||||
|
|
||||||
if rpm_semaphore is not None and isinstance(
|
if rpm_semaphore is not None and isinstance(
|
||||||
|
@ -1049,7 +1063,9 @@ class Router:
|
||||||
)
|
)
|
||||||
|
|
||||||
rpm_semaphore = self._get_client(
|
rpm_semaphore = self._get_client(
|
||||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
deployment=deployment,
|
||||||
|
kwargs=kwargs,
|
||||||
|
client_type="max_parallel_requests",
|
||||||
)
|
)
|
||||||
|
|
||||||
if rpm_semaphore is not None and isinstance(
|
if rpm_semaphore is not None and isinstance(
|
||||||
|
@ -1243,7 +1259,9 @@ class Router:
|
||||||
|
|
||||||
### CONCURRENCY-SAFE RPM CHECKS ###
|
### CONCURRENCY-SAFE RPM CHECKS ###
|
||||||
rpm_semaphore = self._get_client(
|
rpm_semaphore = self._get_client(
|
||||||
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
|
deployment=deployment,
|
||||||
|
kwargs=kwargs,
|
||||||
|
client_type="max_parallel_requests",
|
||||||
)
|
)
|
||||||
|
|
||||||
if rpm_semaphore is not None and isinstance(
|
if rpm_semaphore is not None and isinstance(
|
||||||
|
@ -1862,17 +1880,23 @@ class Router:
|
||||||
model_id = model["model_info"]["id"]
|
model_id = model["model_info"]["id"]
|
||||||
# ### IF RPM SET - initialize a semaphore ###
|
# ### IF RPM SET - initialize a semaphore ###
|
||||||
rpm = litellm_params.get("rpm", None)
|
rpm = litellm_params.get("rpm", None)
|
||||||
if rpm:
|
tpm = litellm_params.get("tpm", None)
|
||||||
semaphore = asyncio.Semaphore(rpm)
|
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||||
cache_key = f"{model_id}_rpm_client"
|
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||||
|
rpm=rpm,
|
||||||
|
max_parallel_requests=max_parallel_requests,
|
||||||
|
tpm=tpm,
|
||||||
|
default_max_parallel_requests=self.default_max_parallel_requests,
|
||||||
|
)
|
||||||
|
if calculated_max_parallel_requests:
|
||||||
|
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||||
|
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
value=semaphore,
|
value=semaphore,
|
||||||
local_only=True,
|
local_only=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# print("STORES SEMAPHORE IN CACHE")
|
|
||||||
|
|
||||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||||
|
@ -2537,8 +2561,8 @@ class Router:
|
||||||
The appropriate client based on the given client_type and kwargs.
|
The appropriate client based on the given client_type and kwargs.
|
||||||
"""
|
"""
|
||||||
model_id = deployment["model_info"]["id"]
|
model_id = deployment["model_info"]["id"]
|
||||||
if client_type == "rpm_client":
|
if client_type == "max_parallel_requests":
|
||||||
cache_key = "{}_rpm_client".format(model_id)
|
cache_key = "{}_max_parallel_requests_client".format(model_id)
|
||||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||||
return client
|
return client
|
||||||
elif client_type == "async":
|
elif client_type == "async":
|
||||||
|
@ -2778,6 +2802,7 @@ class Router:
|
||||||
"""
|
"""
|
||||||
if (
|
if (
|
||||||
self.routing_strategy != "usage-based-routing-v2"
|
self.routing_strategy != "usage-based-routing-v2"
|
||||||
|
and self.routing_strategy != "simple-shuffle"
|
||||||
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
|
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
|
||||||
return self.get_available_deployment(
|
return self.get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -2828,6 +2853,25 @@ class Router:
|
||||||
messages=messages,
|
messages=messages,
|
||||||
input=input,
|
input=input,
|
||||||
)
|
)
|
||||||
|
elif self.routing_strategy == "simple-shuffle":
|
||||||
|
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
|
||||||
|
############## Check if we can do a RPM/TPM based weighted pick #################
|
||||||
|
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
|
||||||
|
if rpm is not None:
|
||||||
|
# use weight-random pick if rpms provided
|
||||||
|
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
|
||||||
|
verbose_router_logger.debug(f"\nrpms {rpms}")
|
||||||
|
total_rpm = sum(rpms)
|
||||||
|
weights = [rpm / total_rpm for rpm in rpms]
|
||||||
|
verbose_router_logger.debug(f"\n weights {weights}")
|
||||||
|
# Perform weighted random pick
|
||||||
|
selected_index = random.choices(range(len(rpms)), weights=weights)[0]
|
||||||
|
verbose_router_logger.debug(f"\n selected index, {selected_index}")
|
||||||
|
deployment = healthy_deployments[selected_index]
|
||||||
|
verbose_router_logger.info(
|
||||||
|
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
|
||||||
|
)
|
||||||
|
return deployment or deployment[0]
|
||||||
|
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
|
|
|
@ -407,13 +407,15 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
tpm_keys.append(tpm_key)
|
tpm_keys.append(tpm_key)
|
||||||
rpm_keys.append(rpm_key)
|
rpm_keys.append(rpm_key)
|
||||||
|
|
||||||
tpm_values = await self.router_cache.async_batch_get_cache(
|
combined_tpm_rpm_keys = tpm_keys + rpm_keys
|
||||||
keys=tpm_keys
|
|
||||||
) # [1, 2, None, ..]
|
combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache(
|
||||||
rpm_values = await self.router_cache.async_batch_get_cache(
|
keys=combined_tpm_rpm_keys
|
||||||
keys=rpm_keys
|
|
||||||
) # [1, 2, None, ..]
|
) # [1, 2, None, ..]
|
||||||
|
|
||||||
|
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
|
||||||
|
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
|
||||||
|
|
||||||
return self._common_checks_available_deployment(
|
return self._common_checks_available_deployment(
|
||||||
model_group=model_group,
|
model_group=model_group,
|
||||||
healthy_deployments=healthy_deployments,
|
healthy_deployments=healthy_deployments,
|
||||||
|
|
|
@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling():
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
response.choices[0].message.tool_calls[0].function.arguments, str
|
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||||
)
|
)
|
||||||
|
messages.append(
|
||||||
|
response.choices[0].message.model_dump()
|
||||||
|
) # Add assistant tool invokes
|
||||||
|
tool_result = (
|
||||||
|
'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
|
||||||
|
)
|
||||||
|
# Add user submitted tool results in the OpenAI format
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"tool_call_id": response.choices[0].message.tool_calls[0].id,
|
||||||
|
"role": "tool",
|
||||||
|
"name": response.choices[0].message.tool_calls[0].function.name,
|
||||||
|
"content": tool_result,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# In the second response, Claude should deduce answer from tool results
|
||||||
|
second_response = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
tool_choice="auto",
|
||||||
|
)
|
||||||
|
print(f"second response: {second_response}")
|
||||||
|
assert isinstance(second_response.choices[0].message.content, str)
|
||||||
except RateLimitError:
|
except RateLimitError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client):
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
from litellm.proxy.proxy_server import user_api_key_cache
|
from litellm.proxy.proxy_server import user_api_key_cache
|
||||||
|
|
||||||
|
await new_team(
|
||||||
|
NewTeamRequest(
|
||||||
|
team_id="ishaan-special-team",
|
||||||
|
),
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
|
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
_response = await new_user(
|
_response = await new_user(
|
||||||
data=NewUserRequest(
|
data=NewUserRequest(
|
||||||
models=["azure-gpt-3.5"],
|
models=["azure-gpt-3.5"],
|
||||||
|
@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client):
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
|
||||||
|
# create team "litellm-core-infra@gmail.com""
|
||||||
|
print("creating team litellm-core-infra@gmail.com")
|
||||||
|
await new_team(
|
||||||
|
NewTeamRequest(
|
||||||
|
team_id="litellm-core-infra@gmail.com",
|
||||||
|
),
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
|
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
await new_team(
|
||||||
|
NewTeamRequest(
|
||||||
|
team_id="ishaan-special-team",
|
||||||
|
),
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
|
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
request = NewUserRequest(
|
request = NewUserRequest(
|
||||||
metadata={"team": "litellm-team3", "project": "litellm-project3"},
|
metadata={"project": "litellm-project3"},
|
||||||
team_id="litellm-core-infra@gmail.com",
|
team_id="litellm-core-infra@gmail.com",
|
||||||
)
|
)
|
||||||
|
|
||||||
key = await new_user(request)
|
key = await new_user(request)
|
||||||
print(key)
|
print(key)
|
||||||
|
|
||||||
|
@ -1015,7 +1046,6 @@ def test_generate_and_update_key(prisma_client):
|
||||||
print("\n info for key=", result["info"])
|
print("\n info for key=", result["info"])
|
||||||
assert result["info"]["max_parallel_requests"] == None
|
assert result["info"]["max_parallel_requests"] == None
|
||||||
assert result["info"]["metadata"] == {
|
assert result["info"]["metadata"] == {
|
||||||
"team": "litellm-team3",
|
|
||||||
"project": "litellm-project3",
|
"project": "litellm-project3",
|
||||||
}
|
}
|
||||||
assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
|
assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
|
||||||
|
@ -1037,7 +1067,7 @@ def test_generate_and_update_key(prisma_client):
|
||||||
# update the team id
|
# update the team id
|
||||||
response2 = await update_key_fn(
|
response2 = await update_key_fn(
|
||||||
request=Request,
|
request=Request,
|
||||||
data=UpdateKeyRequest(key=generated_key, team_id="ishaan"),
|
data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"),
|
||||||
)
|
)
|
||||||
print("response2=", response2)
|
print("response2=", response2)
|
||||||
|
|
||||||
|
@ -1048,11 +1078,10 @@ def test_generate_and_update_key(prisma_client):
|
||||||
print("\n info for key=", result["info"])
|
print("\n info for key=", result["info"])
|
||||||
assert result["info"]["max_parallel_requests"] == None
|
assert result["info"]["max_parallel_requests"] == None
|
||||||
assert result["info"]["metadata"] == {
|
assert result["info"]["metadata"] == {
|
||||||
"team": "litellm-team3",
|
|
||||||
"project": "litellm-project3",
|
"project": "litellm-project3",
|
||||||
}
|
}
|
||||||
assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
|
assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
|
||||||
assert result["info"]["team_id"] == "ishaan"
|
assert result["info"]["team_id"] == "ishaan-special-team"
|
||||||
|
|
||||||
# cleanup - delete key
|
# cleanup - delete key
|
||||||
delete_key_request = KeyRequest(keys=[generated_key])
|
delete_key_request = KeyRequest(keys=[generated_key])
|
||||||
|
@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client):
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
from litellm.proxy.proxy_server import user_api_key_cache
|
from litellm.proxy.proxy_server import user_api_key_cache
|
||||||
|
|
||||||
|
await new_team(
|
||||||
|
NewTeamRequest(
|
||||||
|
team_id="ishaans-special-team",
|
||||||
|
),
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
|
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
_response = await new_user(
|
_response = await new_user(
|
||||||
data=NewUserRequest(
|
data=NewUserRequest(
|
||||||
models=["azure-gpt-3.5"],
|
models=["azure-gpt-3.5"],
|
||||||
|
|
|
@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
|
||||||
# Define the expected log messages
|
# Define the expected log messages
|
||||||
# - error request, falling back notice, success notice
|
# - error request, falling back notice, success notice
|
||||||
expected_logs = [
|
expected_logs = [
|
||||||
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None",
|
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None",
|
||||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
||||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||||
|
|
115
litellm/tests/test_router_max_parallel_requests.py
Normal file
115
litellm/tests/test_router_max_parallel_requests.py
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
# What is this?
|
||||||
|
## Unit tests for the max_parallel_requests feature on Router
|
||||||
|
import sys, os, time, inspect, asyncio, traceback
|
||||||
|
from datetime import datetime
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
import litellm
|
||||||
|
from litellm.utils import calculate_max_parallel_requests
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
"""
|
||||||
|
- only rpm
|
||||||
|
- only tpm
|
||||||
|
- only max_parallel_requests
|
||||||
|
- max_parallel_requests + rpm
|
||||||
|
- max_parallel_requests + tpm
|
||||||
|
- max_parallel_requests + tpm + rpm
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
max_parallel_requests_values = [None, 10]
|
||||||
|
tpm_values = [None, 20, 300000]
|
||||||
|
rpm_values = [None, 30]
|
||||||
|
default_max_parallel_requests = [None, 40]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
|
||||||
|
[
|
||||||
|
(mp, tp, rp, dmp)
|
||||||
|
for mp in max_parallel_requests_values
|
||||||
|
for tp in tpm_values
|
||||||
|
for rp in rpm_values
|
||||||
|
for dmp in default_max_parallel_requests
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests):
|
||||||
|
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||||
|
max_parallel_requests=max_parallel_requests,
|
||||||
|
rpm=rpm,
|
||||||
|
tpm=tpm,
|
||||||
|
default_max_parallel_requests=default_max_parallel_requests,
|
||||||
|
)
|
||||||
|
if max_parallel_requests is not None:
|
||||||
|
assert max_parallel_requests == calculated_max_parallel_requests
|
||||||
|
elif rpm is not None:
|
||||||
|
assert rpm == calculated_max_parallel_requests
|
||||||
|
elif tpm is not None:
|
||||||
|
calculated_rpm = int(tpm / 1000 / 6)
|
||||||
|
if calculated_rpm == 0:
|
||||||
|
calculated_rpm = 1
|
||||||
|
print(
|
||||||
|
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}"
|
||||||
|
)
|
||||||
|
assert calculated_rpm == calculated_max_parallel_requests
|
||||||
|
elif default_max_parallel_requests is not None:
|
||||||
|
assert calculated_max_parallel_requests == default_max_parallel_requests
|
||||||
|
else:
|
||||||
|
assert calculated_max_parallel_requests is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
|
||||||
|
[
|
||||||
|
(mp, tp, rp, dmp)
|
||||||
|
for mp in max_parallel_requests_values
|
||||||
|
for tp in tpm_values
|
||||||
|
for rp in rpm_values
|
||||||
|
for dmp in default_max_parallel_requests
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_setting_mpr_limits_per_model(
|
||||||
|
max_parallel_requests, tpm, rpm, default_max_parallel_requests
|
||||||
|
):
|
||||||
|
deployment = {
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"max_parallel_requests": max_parallel_requests,
|
||||||
|
"tpm": tpm,
|
||||||
|
"rpm": rpm,
|
||||||
|
},
|
||||||
|
"model_info": {"id": "my-unique-id"},
|
||||||
|
}
|
||||||
|
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[deployment],
|
||||||
|
default_max_parallel_requests=default_max_parallel_requests,
|
||||||
|
)
|
||||||
|
|
||||||
|
mpr_client: Optional[asyncio.Semaphore] = router._get_client(
|
||||||
|
deployment=deployment,
|
||||||
|
kwargs={},
|
||||||
|
client_type="max_parallel_requests",
|
||||||
|
)
|
||||||
|
|
||||||
|
if max_parallel_requests is not None:
|
||||||
|
assert max_parallel_requests == mpr_client._value
|
||||||
|
elif rpm is not None:
|
||||||
|
assert rpm == mpr_client._value
|
||||||
|
elif tpm is not None:
|
||||||
|
calculated_rpm = int(tpm / 1000 / 6)
|
||||||
|
if calculated_rpm == 0:
|
||||||
|
calculated_rpm = 1
|
||||||
|
print(
|
||||||
|
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}"
|
||||||
|
)
|
||||||
|
assert calculated_rpm == mpr_client._value
|
||||||
|
elif default_max_parallel_requests is not None:
|
||||||
|
assert mpr_client._value == default_max_parallel_requests
|
||||||
|
else:
|
||||||
|
assert mpr_client is None
|
||||||
|
|
||||||
|
# raise Exception("it worked!")
|
|
@ -5434,6 +5434,49 @@ def get_optional_params(
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_max_parallel_requests(
|
||||||
|
max_parallel_requests: Optional[int],
|
||||||
|
rpm: Optional[int],
|
||||||
|
tpm: Optional[int],
|
||||||
|
default_max_parallel_requests: Optional[int],
|
||||||
|
) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
Returns the max parallel requests to send to a deployment.
|
||||||
|
|
||||||
|
Used in semaphore for async requests on router.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
|
||||||
|
- rpm - Optional[int] - requests per minute allowed for that deployment
|
||||||
|
- tpm - Optional[int] - tokens per minute allowed for that deployment
|
||||||
|
- default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- int or None (if all params are None)
|
||||||
|
|
||||||
|
Order:
|
||||||
|
max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
|
||||||
|
|
||||||
|
Azure RPM formula:
|
||||||
|
6 rpm per 1000 TPM
|
||||||
|
https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
if max_parallel_requests is not None:
|
||||||
|
return max_parallel_requests
|
||||||
|
elif rpm is not None:
|
||||||
|
return rpm
|
||||||
|
elif tpm is not None:
|
||||||
|
calculated_rpm = int(tpm / 1000 / 6)
|
||||||
|
if calculated_rpm == 0:
|
||||||
|
calculated_rpm = 1
|
||||||
|
return calculated_rpm
|
||||||
|
elif default_max_parallel_requests is not None:
|
||||||
|
return default_max_parallel_requests
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_api_base(model: str, optional_params: dict) -> Optional[str]:
|
def get_api_base(model: str, optional_params: dict) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Returns the api base used for calling the model.
|
Returns the api base used for calling the model.
|
||||||
|
|
|
@ -96,9 +96,9 @@ litellm_settings:
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
routing_strategy: usage-based-routing-v2
|
routing_strategy: usage-based-routing-v2
|
||||||
redis_host: os.environ/REDIS_HOST
|
# redis_host: os.environ/REDIS_HOST
|
||||||
redis_password: os.environ/REDIS_PASSWORD
|
# redis_password: os.environ/REDIS_PASSWORD
|
||||||
redis_port: os.environ/REDIS_PORT
|
# redis_port: os.environ/REDIS_PORT
|
||||||
enable_pre_call_checks: true
|
enable_pre_call_checks: true
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.35.17"
|
version = "1.35.18"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.35.17"
|
version = "1.35.18"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -14,6 +14,24 @@ sys.path.insert(
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_team(session):
|
||||||
|
url = "http://0.0.0.0:4000/team/new"
|
||||||
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||||
|
data = {
|
||||||
|
"team_id": "litellm-dashboard",
|
||||||
|
}
|
||||||
|
|
||||||
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
print(f"Response (Status code: {status}):")
|
||||||
|
print(response_text)
|
||||||
|
print()
|
||||||
|
_json_response = await response.json()
|
||||||
|
return _json_response
|
||||||
|
|
||||||
|
|
||||||
async def generate_user(
|
async def generate_user(
|
||||||
session,
|
session,
|
||||||
user_role="app_owner",
|
user_role="app_owner",
|
||||||
|
@ -668,7 +686,7 @@ async def test_key_rate_limit():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_key_delete():
|
async def test_key_delete_ui():
|
||||||
"""
|
"""
|
||||||
Admin UI flow - DO NOT DELETE
|
Admin UI flow - DO NOT DELETE
|
||||||
-> Create a key with user_id = "ishaan"
|
-> Create a key with user_id = "ishaan"
|
||||||
|
@ -680,6 +698,8 @@ async def test_key_delete():
|
||||||
key = key_gen["key"]
|
key = key_gen["key"]
|
||||||
|
|
||||||
# generate a admin UI key
|
# generate a admin UI key
|
||||||
|
team = await generate_team(session=session)
|
||||||
|
print("generated team: ", team)
|
||||||
admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
|
admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
|
||||||
print(
|
print(
|
||||||
"trying to delete key=",
|
"trying to delete key=",
|
||||||
|
|
|
@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit():
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
pytest.fail("Expected at least 1 call to fail")
|
pytest.fail("Expected at least 1 call to fail")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if "Request did not return a 200 status code: 429" in str(e):
|
||||||
pass
|
pass
|
||||||
|
else:
|
||||||
|
pytest.fail(f"Wrong error received - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
|
3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -9,6 +9,7 @@ import Teams from "@/components/teams";
|
||||||
import AdminPanel from "@/components/admins";
|
import AdminPanel from "@/components/admins";
|
||||||
import Settings from "@/components/settings";
|
import Settings from "@/components/settings";
|
||||||
import GeneralSettings from "@/components/general_settings";
|
import GeneralSettings from "@/components/general_settings";
|
||||||
|
import APIRef from "@/components/api_ref";
|
||||||
import ChatUI from "@/components/chat_ui";
|
import ChatUI from "@/components/chat_ui";
|
||||||
import Sidebar from "../components/leftnav";
|
import Sidebar from "../components/leftnav";
|
||||||
import Usage from "../components/usage";
|
import Usage from "../components/usage";
|
||||||
|
@ -165,6 +166,8 @@ const CreateKeyPage = () => {
|
||||||
accessToken={accessToken}
|
accessToken={accessToken}
|
||||||
showSSOBanner={showSSOBanner}
|
showSSOBanner={showSSOBanner}
|
||||||
/>
|
/>
|
||||||
|
) : page == "api_ref" ? (
|
||||||
|
<APIRef/>
|
||||||
) : page == "settings" ? (
|
) : page == "settings" ? (
|
||||||
<Settings
|
<Settings
|
||||||
userID={userID}
|
userID={userID}
|
||||||
|
|
152
ui/litellm-dashboard/src/components/api_ref.tsx
Normal file
152
ui/litellm-dashboard/src/components/api_ref.tsx
Normal file
|
@ -0,0 +1,152 @@
|
||||||
|
"use client";
|
||||||
|
import React, { useEffect, useState } from "react";
|
||||||
|
import {
|
||||||
|
Badge,
|
||||||
|
Card,
|
||||||
|
Table,
|
||||||
|
Metric,
|
||||||
|
TableBody,
|
||||||
|
TableCell,
|
||||||
|
TableHead,
|
||||||
|
TableHeaderCell,
|
||||||
|
TableRow,
|
||||||
|
Text,
|
||||||
|
Title,
|
||||||
|
Icon,
|
||||||
|
Accordion,
|
||||||
|
AccordionBody,
|
||||||
|
AccordionHeader,
|
||||||
|
List,
|
||||||
|
ListItem,
|
||||||
|
Tab,
|
||||||
|
TabGroup,
|
||||||
|
TabList,
|
||||||
|
TabPanel,
|
||||||
|
TabPanels,
|
||||||
|
Grid,
|
||||||
|
} from "@tremor/react";
|
||||||
|
import { Statistic } from "antd"
|
||||||
|
import { modelAvailableCall } from "./networking";
|
||||||
|
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
||||||
|
|
||||||
|
|
||||||
|
const APIRef = ({}) => {
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<Grid className="gap-2 p-8 h-[80vh] w-full mt-2">
|
||||||
|
<div className="mb-5">
|
||||||
|
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">OpenAI Compatible Proxy: API Reference</p>
|
||||||
|
<Text className="mt-2 mb-2">LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below </Text>
|
||||||
|
|
||||||
|
<TabGroup>
|
||||||
|
<TabList>
|
||||||
|
<Tab>OpenAI Python SDK</Tab>
|
||||||
|
<Tab>LlamaIndex</Tab>
|
||||||
|
<Tab>Langchain Py</Tab>
|
||||||
|
</TabList>
|
||||||
|
<TabPanels>
|
||||||
|
<TabPanel>
|
||||||
|
<SyntaxHighlighter language="python">
|
||||||
|
{`
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="your_api_key",
|
||||||
|
base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo", # model to send to the proxy
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
`}
|
||||||
|
</SyntaxHighlighter>
|
||||||
|
</TabPanel>
|
||||||
|
<TabPanel>
|
||||||
|
<SyntaxHighlighter language="python">
|
||||||
|
{`
|
||||||
|
import os, dotenv
|
||||||
|
|
||||||
|
from llama_index.llms import AzureOpenAI
|
||||||
|
from llama_index.embeddings import AzureOpenAIEmbedding
|
||||||
|
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
||||||
|
|
||||||
|
llm = AzureOpenAI(
|
||||||
|
engine="azure-gpt-3.5", # model_name on litellm proxy
|
||||||
|
temperature=0.0,
|
||||||
|
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
|
||||||
|
api_key="sk-1234", # litellm proxy API Key
|
||||||
|
api_version="2023-07-01-preview",
|
||||||
|
)
|
||||||
|
|
||||||
|
embed_model = AzureOpenAIEmbedding(
|
||||||
|
deployment_name="azure-embedding-model",
|
||||||
|
azure_endpoint="http://0.0.0.0:4000",
|
||||||
|
api_key="sk-1234",
|
||||||
|
api_version="2023-07-01-preview",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
documents = SimpleDirectoryReader("llama_index_data").load_data()
|
||||||
|
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
||||||
|
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||||
|
|
||||||
|
query_engine = index.as_query_engine()
|
||||||
|
response = query_engine.query("What did the author do growing up?")
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
`}
|
||||||
|
</SyntaxHighlighter>
|
||||||
|
</TabPanel>
|
||||||
|
<TabPanel>
|
||||||
|
<SyntaxHighlighter language="python">
|
||||||
|
{`
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
|
model = "gpt-3.5-turbo",
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
`}
|
||||||
|
</SyntaxHighlighter>
|
||||||
|
</TabPanel>
|
||||||
|
</TabPanels>
|
||||||
|
</TabGroup>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</Grid>
|
||||||
|
|
||||||
|
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default APIRef;
|
||||||
|
|
|
@ -13,12 +13,12 @@ import {
|
||||||
TabGroup,
|
TabGroup,
|
||||||
TabList,
|
TabList,
|
||||||
TabPanel,
|
TabPanel,
|
||||||
|
TabPanels,
|
||||||
Metric,
|
Metric,
|
||||||
Col,
|
Col,
|
||||||
Text,
|
Text,
|
||||||
SelectItem,
|
SelectItem,
|
||||||
TextInput,
|
TextInput,
|
||||||
TabPanels,
|
|
||||||
Button,
|
Button,
|
||||||
} from "@tremor/react";
|
} from "@tremor/react";
|
||||||
|
|
||||||
|
@ -201,7 +201,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
<TabGroup>
|
<TabGroup>
|
||||||
<TabList>
|
<TabList>
|
||||||
<Tab>Chat</Tab>
|
<Tab>Chat</Tab>
|
||||||
<Tab>API Reference</Tab>
|
|
||||||
</TabList>
|
</TabList>
|
||||||
|
|
||||||
<TabPanels>
|
<TabPanels>
|
||||||
|
@ -272,124 +271,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
<TabPanel>
|
|
||||||
<TabGroup>
|
|
||||||
<TabList>
|
|
||||||
<Tab>OpenAI Python SDK</Tab>
|
|
||||||
<Tab>LlamaIndex</Tab>
|
|
||||||
<Tab>Langchain Py</Tab>
|
|
||||||
</TabList>
|
|
||||||
<TabPanels>
|
|
||||||
<TabPanel>
|
|
||||||
<SyntaxHighlighter language="python">
|
|
||||||
{`
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="your_api_key",
|
|
||||||
base_url="http://0.0.0.0:4000" # proxy base url
|
|
||||||
)
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="gpt-3.5-turbo", # model to use from Models Tab
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test request, write a short poem"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"generation_name": "ishaan-generation-openai-client",
|
|
||||||
"generation_id": "openai-client-gen-id22",
|
|
||||||
"trace_id": "openai-client-trace-id22",
|
|
||||||
"trace_user_id": "openai-client-user-id2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
`}
|
|
||||||
</SyntaxHighlighter>
|
|
||||||
</TabPanel>
|
|
||||||
<TabPanel>
|
|
||||||
<SyntaxHighlighter language="python">
|
|
||||||
{`
|
|
||||||
import os, dotenv
|
|
||||||
|
|
||||||
from llama_index.llms import AzureOpenAI
|
|
||||||
from llama_index.embeddings import AzureOpenAIEmbedding
|
|
||||||
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
|
||||||
|
|
||||||
llm = AzureOpenAI(
|
|
||||||
engine="azure-gpt-3.5", # model_name on litellm proxy
|
|
||||||
temperature=0.0,
|
|
||||||
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
|
|
||||||
api_key="sk-1234", # litellm proxy API Key
|
|
||||||
api_version="2023-07-01-preview",
|
|
||||||
)
|
|
||||||
|
|
||||||
embed_model = AzureOpenAIEmbedding(
|
|
||||||
deployment_name="azure-embedding-model",
|
|
||||||
azure_endpoint="http://0.0.0.0:4000",
|
|
||||||
api_key="sk-1234",
|
|
||||||
api_version="2023-07-01-preview",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
documents = SimpleDirectoryReader("llama_index_data").load_data()
|
|
||||||
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
|
||||||
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
|
||||||
|
|
||||||
query_engine = index.as_query_engine()
|
|
||||||
response = query_engine.query("What did the author do growing up?")
|
|
||||||
print(response)
|
|
||||||
|
|
||||||
`}
|
|
||||||
</SyntaxHighlighter>
|
|
||||||
</TabPanel>
|
|
||||||
<TabPanel>
|
|
||||||
<SyntaxHighlighter language="python">
|
|
||||||
{`
|
|
||||||
from langchain.chat_models import ChatOpenAI
|
|
||||||
from langchain.prompts.chat import (
|
|
||||||
ChatPromptTemplate,
|
|
||||||
HumanMessagePromptTemplate,
|
|
||||||
SystemMessagePromptTemplate,
|
|
||||||
)
|
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
|
||||||
openai_api_base="http://0.0.0.0:8000",
|
|
||||||
model = "gpt-3.5-turbo",
|
|
||||||
temperature=0.1,
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"generation_name": "ishaan-generation-langchain-client",
|
|
||||||
"generation_id": "langchain-client-gen-id22",
|
|
||||||
"trace_id": "langchain-client-trace-id22",
|
|
||||||
"trace_user_id": "langchain-client-user-id2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
SystemMessage(
|
|
||||||
content="You are a helpful assistant that im using to make a test request to."
|
|
||||||
),
|
|
||||||
HumanMessage(
|
|
||||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
response = chat(messages)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
|
|
||||||
`}
|
|
||||||
</SyntaxHighlighter>
|
|
||||||
</TabPanel>
|
|
||||||
</TabPanels>
|
|
||||||
</TabGroup>
|
|
||||||
</TabPanel>
|
|
||||||
</TabPanels>
|
</TabPanels>
|
||||||
</TabGroup>
|
</TabGroup>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import React, { useState, useEffect, useRef } from "react";
|
import React, { useState, useEffect, useRef } from "react";
|
||||||
import { Button, TextInput, Grid, Col } from "@tremor/react";
|
import { Button, TextInput, Grid, Col } from "@tremor/react";
|
||||||
import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
|
import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react";
|
||||||
import { CopyToClipboard } from 'react-copy-to-clipboard';
|
import { CopyToClipboard } from 'react-copy-to-clipboard';
|
||||||
import {
|
import {
|
||||||
Button as Button2,
|
Button as Button2,
|
||||||
|
@ -147,6 +147,17 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
mode="multiple"
|
mode="multiple"
|
||||||
placeholder="Select models"
|
placeholder="Select models"
|
||||||
style={{ width: "100%" }}
|
style={{ width: "100%" }}
|
||||||
|
onChange={(values) => {
|
||||||
|
// Check if "All Team Models" is selected
|
||||||
|
const isAllTeamModelsSelected = values.includes("all-team-models");
|
||||||
|
|
||||||
|
// If "All Team Models" is selected, deselect all other models
|
||||||
|
if (isAllTeamModelsSelected) {
|
||||||
|
const newValues = ["all-team-models"];
|
||||||
|
// You can call the form's setFieldsValue method to update the value
|
||||||
|
form.setFieldsValue({ models: newValues });
|
||||||
|
}
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
<Option key="all-team-models" value="all-team-models">
|
<Option key="all-team-models" value="all-team-models">
|
||||||
All Team Models
|
All Team Models
|
||||||
|
@ -248,16 +259,153 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
<Form.Item label="Key Name" name="key_alias">
|
<Form.Item
|
||||||
|
label="Key Name"
|
||||||
|
name="key_alias"
|
||||||
|
rules={[{ required: true, message: 'Please input a key name' }]}
|
||||||
|
help="required"
|
||||||
|
>
|
||||||
<Input />
|
<Input />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
<Form.Item label="Team ID (Contact Group)" name="team_id">
|
<Form.Item
|
||||||
<Input placeholder="default team (create a new team)" />
|
label="Team ID"
|
||||||
|
name="team_id"
|
||||||
|
hidden={true}
|
||||||
|
initialValue={team ? team["team_id"] : null}
|
||||||
|
valuePropName="team_id"
|
||||||
|
className="mt-8"
|
||||||
|
>
|
||||||
|
<Input value={team ? team["team_alias"] : ""} disabled />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
<Form.Item label="Description" name="description">
|
<Form.Item
|
||||||
<Input.TextArea placeholder="Enter description" rows={4} />
|
label="Models"
|
||||||
|
name="models"
|
||||||
|
className="mb-12"
|
||||||
|
rules={[{ required: true, message: 'Please select a model' }]}
|
||||||
|
help="required"
|
||||||
|
>
|
||||||
|
<Select
|
||||||
|
mode="multiple"
|
||||||
|
placeholder="Select models"
|
||||||
|
style={{ width: "100%" }}
|
||||||
|
onChange={(values) => {
|
||||||
|
const isAllTeamModelsSelected = values.includes("all-team-models");
|
||||||
|
|
||||||
|
if (isAllTeamModelsSelected) {
|
||||||
|
const newValues = ["all-team-models"];
|
||||||
|
form.setFieldsValue({ models: newValues });
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
|
||||||
|
>
|
||||||
|
<Option key="all-team-models" value="all-team-models">
|
||||||
|
All Team Models
|
||||||
|
</Option>
|
||||||
|
{team && team.models ? (
|
||||||
|
team.models.includes("all-proxy-models") ? (
|
||||||
|
userModels.map((model: string) => (
|
||||||
|
(
|
||||||
|
<Option key={model} value={model}>
|
||||||
|
{model}
|
||||||
|
</Option>
|
||||||
|
)
|
||||||
|
))
|
||||||
|
) : (
|
||||||
|
team.models.map((model: string) => (
|
||||||
|
<Option key={model} value={model}>
|
||||||
|
{model}
|
||||||
|
</Option>
|
||||||
|
))
|
||||||
|
)
|
||||||
|
) : (
|
||||||
|
userModels.map((model: string) => (
|
||||||
|
<Option key={model} value={model}>
|
||||||
|
{model}
|
||||||
|
</Option>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
|
||||||
|
</Select>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
|
<Accordion className="mt-20 mb-8" >
|
||||||
|
<AccordionHeader>
|
||||||
|
<b>Optional Settings</b>
|
||||||
|
</AccordionHeader>
|
||||||
|
<AccordionBody>
|
||||||
|
<Form.Item
|
||||||
|
className="mt-8"
|
||||||
|
label="Max Budget (USD)"
|
||||||
|
name="max_budget"
|
||||||
|
help={`Budget cannot exceed team max budget: $${team?.max_budget !== null && team?.max_budget !== undefined ? team?.max_budget : 'unlimited'}`}
|
||||||
|
rules={[
|
||||||
|
{
|
||||||
|
validator: async (_, value) => {
|
||||||
|
if (value && team && team.max_budget !== null && value > team.max_budget) {
|
||||||
|
throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]}
|
||||||
|
>
|
||||||
|
<InputNumber step={0.01} precision={2} width={200} />
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item
|
||||||
|
className="mt-8"
|
||||||
|
label="Reset Budget"
|
||||||
|
name="budget_duration"
|
||||||
|
help={`Team Reset Budget: ${team?.budget_duration !== null && team?.budget_duration !== undefined ? team?.budget_duration : 'None'}`}
|
||||||
|
>
|
||||||
|
<Select defaultValue={null} placeholder="n/a">
|
||||||
|
<Select.Option value="24h">daily</Select.Option>
|
||||||
|
<Select.Option value="30d">monthly</Select.Option>
|
||||||
|
</Select>
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item
|
||||||
|
className="mt-8"
|
||||||
|
label="Tokens per minute Limit (TPM)"
|
||||||
|
name="tpm_limit"
|
||||||
|
help={`TPM cannot exceed team TPM limit: ${team?.tpm_limit !== null && team?.tpm_limit !== undefined ? team?.tpm_limit : 'unlimited'}`}
|
||||||
|
rules={[
|
||||||
|
{
|
||||||
|
validator: async (_, value) => {
|
||||||
|
if (value && team && team.tpm_limit !== null && value > team.tpm_limit) {
|
||||||
|
throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]}
|
||||||
|
>
|
||||||
|
<InputNumber step={1} width={400} />
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item
|
||||||
|
className="mt-8"
|
||||||
|
label="Requests per minute Limit (RPM)"
|
||||||
|
name="rpm_limit"
|
||||||
|
help={`RPM cannot exceed team RPM limit: ${team?.rpm_limit !== null && team?.rpm_limit !== undefined ? team?.rpm_limit : 'unlimited'}`}
|
||||||
|
rules={[
|
||||||
|
{
|
||||||
|
validator: async (_, value) => {
|
||||||
|
if (value && team && team.rpm_limit !== null && value > team.rpm_limit) {
|
||||||
|
throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]}
|
||||||
|
>
|
||||||
|
<InputNumber step={1} width={400} />
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item label="Expire Key (eg: 30s, 30h, 30d)" name="duration" className="mt-8">
|
||||||
|
<Input />
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item label="Metadata" name="metadata">
|
||||||
|
<Input.TextArea rows={4} placeholder="Enter metadata as JSON" />
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
</AccordionBody>
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
<div style={{ textAlign: "right", marginTop: "10px" }}>
|
<div style={{ textAlign: "right", marginTop: "10px" }}>
|
||||||
|
|
|
@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react";
|
||||||
interface DashboardTeamProps {
|
interface DashboardTeamProps {
|
||||||
teams: Object[] | null;
|
teams: Object[] | null;
|
||||||
setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
|
setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
|
||||||
|
userRole: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
type TeamInterface = {
|
type TeamInterface = {
|
||||||
|
@ -15,6 +16,7 @@ type TeamInterface = {
|
||||||
const DashboardTeam: React.FC<DashboardTeamProps> = ({
|
const DashboardTeam: React.FC<DashboardTeamProps> = ({
|
||||||
teams,
|
teams,
|
||||||
setSelectedTeam,
|
setSelectedTeam,
|
||||||
|
userRole,
|
||||||
}) => {
|
}) => {
|
||||||
const defaultTeam: TeamInterface = {
|
const defaultTeam: TeamInterface = {
|
||||||
models: [],
|
models: [],
|
||||||
|
@ -25,19 +27,26 @@ const DashboardTeam: React.FC<DashboardTeamProps> = ({
|
||||||
|
|
||||||
const [value, setValue] = useState(defaultTeam);
|
const [value, setValue] = useState(defaultTeam);
|
||||||
|
|
||||||
const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
|
let updatedTeams;
|
||||||
|
if (userRole === "App User") {
|
||||||
|
// Non-Admin SSO users should only see their own team - they should not see "Default Team"
|
||||||
|
updatedTeams = teams;
|
||||||
|
} else {
|
||||||
|
updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
|
||||||
|
}
|
||||||
|
if (userRole === 'App User') return null;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="mt-5 mb-5">
|
<div className="mt-5 mb-5">
|
||||||
<Title>Select Team</Title>
|
<Title>Select Team</Title>
|
||||||
|
|
||||||
<Text>
|
<Text>
|
||||||
If you belong to multiple teams, this setting controls which team is
|
If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys.
|
||||||
used by default when creating new API Keys.
|
|
||||||
</Text>
|
</Text>
|
||||||
<Text className="mt-3 mb-3">
|
<Text className="mt-3 mb-3">
|
||||||
<b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
|
<b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
|
||||||
</Text>
|
</Text>
|
||||||
|
|
||||||
{updatedTeams && updatedTeams.length > 0 ? (
|
{updatedTeams && updatedTeams.length > 0 ? (
|
||||||
<Select defaultValue="0">
|
<Select defaultValue="0">
|
||||||
{updatedTeams.map((team: any, index) => (
|
{updatedTeams.map((team: any, index) => (
|
||||||
|
|
|
@ -46,8 +46,8 @@ const Sidebar: React.FC<SidebarProps> = ({
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return (
|
return (
|
||||||
<Layout style={{ minHeight: "100vh", maxWidth: "120px" }}>
|
<Layout style={{ minHeight: "100vh", maxWidth: "130px" }}>
|
||||||
<Sider width={120}>
|
<Sider width={130}>
|
||||||
<Menu
|
<Menu
|
||||||
mode="inline"
|
mode="inline"
|
||||||
defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
|
defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
|
||||||
|
@ -63,11 +63,23 @@ const Sidebar: React.FC<SidebarProps> = ({
|
||||||
Test Key
|
Test Key
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
|
|
||||||
|
<Menu.Item key="11" onClick={() => setPage("api_ref")}>
|
||||||
|
<Text>
|
||||||
|
API Reference
|
||||||
|
</Text>
|
||||||
|
</Menu.Item>
|
||||||
|
|
||||||
|
{
|
||||||
|
userRole == "Admin" ? (
|
||||||
<Menu.Item key="2" onClick={() => setPage("models")}>
|
<Menu.Item key="2" onClick={() => setPage("models")}>
|
||||||
<Text>
|
<Text>
|
||||||
Models
|
Models
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
|
) : null
|
||||||
|
}
|
||||||
|
|
||||||
{userRole == "Admin" ? (
|
{userRole == "Admin" ? (
|
||||||
<Menu.Item key="6" onClick={() => setPage("teams")}>
|
<Menu.Item key="6" onClick={() => setPage("teams")}>
|
||||||
<Text>
|
<Text>
|
||||||
|
@ -75,11 +87,18 @@ const Sidebar: React.FC<SidebarProps> = ({
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
|
{
|
||||||
|
userRole == "Admin" ? (
|
||||||
<Menu.Item key="4" onClick={() => setPage("usage")}>
|
<Menu.Item key="4" onClick={() => setPage("usage")}>
|
||||||
<Text>
|
<Text>
|
||||||
Usage
|
Usage
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
|
|
||||||
|
) : null
|
||||||
|
}
|
||||||
|
|
||||||
{userRole == "Admin" ? (
|
{userRole == "Admin" ? (
|
||||||
<Menu.Item key="5" onClick={() => setPage("users")}>
|
<Menu.Item key="5" onClick={() => setPage("users")}>
|
||||||
<Text>
|
<Text>
|
||||||
|
@ -87,16 +106,27 @@ const Sidebar: React.FC<SidebarProps> = ({
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
|
{
|
||||||
|
userRole == "Admin" ? (
|
||||||
<Menu.Item key="8" onClick={() => setPage("settings")}>
|
<Menu.Item key="8" onClick={() => setPage("settings")}>
|
||||||
<Text>
|
<Text>
|
||||||
Integrations
|
Integrations
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
|
) : null
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
userRole == "Admin" ? (
|
||||||
<Menu.Item key="9" onClick={() => setPage("general-settings")}>
|
<Menu.Item key="9" onClick={() => setPage("general-settings")}>
|
||||||
<Text>
|
<Text>
|
||||||
Settings
|
Settings
|
||||||
</Text>
|
</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
|
) : null
|
||||||
|
}
|
||||||
|
|
||||||
{userRole == "Admin" ? (
|
{userRole == "Admin" ? (
|
||||||
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
|
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
|
||||||
<Text>
|
<Text>
|
||||||
|
|
|
@ -296,6 +296,9 @@ export const userInfoCall = async (
|
||||||
if (userRole == "App Owner" && userID) {
|
if (userRole == "App Owner" && userID) {
|
||||||
url = `${url}?user_id=${userID}`;
|
url = `${url}?user_id=${userID}`;
|
||||||
}
|
}
|
||||||
|
if (userRole == "App User" && userID) {
|
||||||
|
url = `${url}?user_id=${userID}`;
|
||||||
|
}
|
||||||
console.log("in userInfoCall viewAll=", viewAll);
|
console.log("in userInfoCall viewAll=", viewAll);
|
||||||
if (viewAll && page_size && (page != null) && (page != undefined)) {
|
if (viewAll && page_size && (page != null) && (page != undefined)) {
|
||||||
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
|
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
|
||||||
|
|
|
@ -5,6 +5,7 @@ import { Grid, Col, Card, Text, Title } from "@tremor/react";
|
||||||
import CreateKey from "./create_key_button";
|
import CreateKey from "./create_key_button";
|
||||||
import ViewKeyTable from "./view_key_table";
|
import ViewKeyTable from "./view_key_table";
|
||||||
import ViewUserSpend from "./view_user_spend";
|
import ViewUserSpend from "./view_user_spend";
|
||||||
|
import ViewUserTeam from "./view_user_team";
|
||||||
import DashboardTeam from "./dashboard_default_team";
|
import DashboardTeam from "./dashboard_default_team";
|
||||||
import { useSearchParams, useRouter } from "next/navigation";
|
import { useSearchParams, useRouter } from "next/navigation";
|
||||||
import { jwtDecode } from "jwt-decode";
|
import { jwtDecode } from "jwt-decode";
|
||||||
|
@ -232,11 +233,19 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
<div className="w-full mx-4">
|
<div className="w-full mx-4">
|
||||||
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
|
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
|
||||||
<Col numColSpan={1}>
|
<Col numColSpan={1}>
|
||||||
|
<ViewUserTeam
|
||||||
|
userID={userID}
|
||||||
|
userRole={userRole}
|
||||||
|
selectedTeam={selectedTeam ? selectedTeam : null}
|
||||||
|
accessToken={accessToken}
|
||||||
|
/>
|
||||||
<ViewUserSpend
|
<ViewUserSpend
|
||||||
userID={userID}
|
userID={userID}
|
||||||
userRole={userRole}
|
userRole={userRole}
|
||||||
accessToken={accessToken}
|
accessToken={accessToken}
|
||||||
userSpend={teamSpend}
|
userSpend={teamSpend}
|
||||||
|
selectedTeam = {selectedTeam ? selectedTeam : null}
|
||||||
|
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<ViewKeyTable
|
<ViewKeyTable
|
||||||
|
@ -257,7 +266,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
data={keys}
|
data={keys}
|
||||||
setData={setKeys}
|
setData={setKeys}
|
||||||
/>
|
/>
|
||||||
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} />
|
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
|
||||||
</Col>
|
</Col>
|
||||||
</Grid>
|
</Grid>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
import React, { useEffect, useState } from "react";
|
import React, { useEffect, useState } from "react";
|
||||||
import { keyDeleteCall, getTotalSpendCall } from "./networking";
|
import { keyDeleteCall, getTotalSpendCall } from "./networking";
|
||||||
import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
|
import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
|
||||||
import { DonutChart } from "@tremor/react";
|
import { Accordion, AccordionHeader, AccordionList, DonutChart } from "@tremor/react";
|
||||||
import {
|
import {
|
||||||
Badge,
|
Badge,
|
||||||
Card,
|
Card,
|
||||||
|
@ -16,9 +16,13 @@ import {
|
||||||
Text,
|
Text,
|
||||||
Title,
|
Title,
|
||||||
Icon,
|
Icon,
|
||||||
|
AccordionBody,
|
||||||
|
List,
|
||||||
|
ListItem,
|
||||||
|
|
||||||
} from "@tremor/react";
|
} from "@tremor/react";
|
||||||
import { Statistic } from "antd"
|
import { Statistic } from "antd"
|
||||||
import { spendUsersCall } from "./networking";
|
import { spendUsersCall, modelAvailableCall } from "./networking";
|
||||||
|
|
||||||
|
|
||||||
// Define the props type
|
// Define the props type
|
||||||
|
@ -32,11 +36,13 @@ interface ViewUserSpendProps {
|
||||||
userRole: string | null;
|
userRole: string | null;
|
||||||
accessToken: string | null;
|
accessToken: string | null;
|
||||||
userSpend: number | null;
|
userSpend: number | null;
|
||||||
|
selectedTeam: any | null;
|
||||||
}
|
}
|
||||||
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend }) => {
|
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend, selectedTeam }) => {
|
||||||
console.log(`userSpend: ${userSpend}`)
|
console.log(`userSpend: ${userSpend}`)
|
||||||
let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
|
let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
|
||||||
const [maxBudget, setMaxBudget] = useState(0.0);
|
const [maxBudget, setMaxBudget] = useState(0.0);
|
||||||
|
const [userModels, setUserModels] = useState([]);
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
if (!accessToken || !userID || !userRole) {
|
if (!accessToken || !userID || !userRole) {
|
||||||
|
@ -62,9 +68,30 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
const fetchUserModels = async () => {
|
||||||
|
try {
|
||||||
|
if (userID === null || userRole === null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (accessToken !== null) {
|
||||||
|
const model_available = await modelAvailableCall(accessToken, userID, userRole);
|
||||||
|
let available_model_names = model_available["data"].map(
|
||||||
|
(element: { id: string }) => element.id
|
||||||
|
);
|
||||||
|
console.log("available_model_names:", available_model_names);
|
||||||
|
setUserModels(available_model_names);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error fetching user models:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchUserModels();
|
||||||
fetchData();
|
fetchData();
|
||||||
}, [userRole, accessToken]);
|
}, [userRole, accessToken, userID]);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (userSpend !== null) {
|
if (userSpend !== null) {
|
||||||
|
@ -72,18 +99,50 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
|
||||||
}
|
}
|
||||||
}, [userSpend])
|
}, [userSpend])
|
||||||
|
|
||||||
|
// logic to decide what models to display
|
||||||
|
let modelsToDisplay = [];
|
||||||
|
if (selectedTeam && selectedTeam.models) {
|
||||||
|
modelsToDisplay = selectedTeam.models;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if "all-proxy-models" is in modelsToDisplay
|
||||||
|
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
|
||||||
|
console.log("user models:", userModels);
|
||||||
|
modelsToDisplay = userModels;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";
|
const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";
|
||||||
|
|
||||||
const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;
|
const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;
|
||||||
|
|
||||||
console.log(`spend in view user spend: ${spend}`)
|
console.log(`spend in view user spend: ${spend}`)
|
||||||
return (
|
return (
|
||||||
<>
|
<div className="flex items-center">
|
||||||
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">Total Spend </p>
|
<div>
|
||||||
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">${roundedSpend}</p>
|
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">
|
||||||
|
Total Spend{" "}
|
||||||
</>
|
</p>
|
||||||
)
|
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">
|
||||||
|
${roundedSpend}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="ml-auto">
|
||||||
|
<Accordion>
|
||||||
|
<AccordionHeader>Models</AccordionHeader>
|
||||||
|
<AccordionBody className="absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs">
|
||||||
|
<List>
|
||||||
|
{modelsToDisplay.map((model: string) => (
|
||||||
|
<ListItem key={model}>
|
||||||
|
<Text>{model}</Text>
|
||||||
|
</ListItem>
|
||||||
|
))}
|
||||||
|
</List>
|
||||||
|
</AccordionBody>
|
||||||
|
</Accordion>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ViewUserSpend;
|
export default ViewUserSpend;
|
||||||
|
|
78
ui/litellm-dashboard/src/components/view_user_team.tsx
Normal file
78
ui/litellm-dashboard/src/components/view_user_team.tsx
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
"use client";
|
||||||
|
import React, { useEffect, useState } from "react";
|
||||||
|
import {
|
||||||
|
Badge,
|
||||||
|
Card,
|
||||||
|
Table,
|
||||||
|
Metric,
|
||||||
|
TableBody,
|
||||||
|
TableCell,
|
||||||
|
TableHead,
|
||||||
|
TableHeaderCell,
|
||||||
|
TableRow,
|
||||||
|
Text,
|
||||||
|
Title,
|
||||||
|
Icon,
|
||||||
|
Accordion,
|
||||||
|
AccordionBody,
|
||||||
|
AccordionHeader,
|
||||||
|
List,
|
||||||
|
ListItem,
|
||||||
|
} from "@tremor/react";
|
||||||
|
import { Statistic } from "antd"
|
||||||
|
import { modelAvailableCall } from "./networking";
|
||||||
|
|
||||||
|
|
||||||
|
interface ViewUserTeamProps {
|
||||||
|
userID: string | null;
|
||||||
|
userRole: string | null;
|
||||||
|
selectedTeam: any | null;
|
||||||
|
accessToken: string | null;
|
||||||
|
}
|
||||||
|
const ViewUserTeam: React.FC<ViewUserTeamProps> = ({ userID, userRole, selectedTeam, accessToken}) => {
|
||||||
|
const [userModels, setUserModels] = useState([]);
|
||||||
|
useEffect(() => {
|
||||||
|
const fetchUserModels = async () => {
|
||||||
|
try {
|
||||||
|
if (userID === null || userRole === null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (accessToken !== null) {
|
||||||
|
const model_available = await modelAvailableCall(accessToken, userID, userRole);
|
||||||
|
let available_model_names = model_available["data"].map(
|
||||||
|
(element: { id: string }) => element.id
|
||||||
|
);
|
||||||
|
console.log("available_model_names:", available_model_names);
|
||||||
|
setUserModels(available_model_names);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error fetching user models:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchUserModels();
|
||||||
|
}, [accessToken, userID, userRole]);
|
||||||
|
|
||||||
|
// logic to decide what models to display
|
||||||
|
let modelsToDisplay = [];
|
||||||
|
if (selectedTeam && selectedTeam.models) {
|
||||||
|
modelsToDisplay = selectedTeam.models;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if "all-proxy-models" is in modelsToDisplay
|
||||||
|
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
|
||||||
|
console.log("user models:", userModels);
|
||||||
|
modelsToDisplay = userModels;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<div className="mb-5">
|
||||||
|
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">{selectedTeam?.team_alias}</p>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ViewUserTeam;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue