Merge branch 'BerriAI:main' into feature/watsonx-integration

This commit is contained in:
Simon S. Viloria 2024-04-21 10:35:51 +02:00 committed by GitHub
commit a77537ddd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
45 changed files with 1027 additions and 281 deletions

View file

@ -279,7 +279,7 @@ router_settings:
``` ```
</TabItem> </TabItem>
<TabItem value="simple-shuffle" label="(Default) Weighted Pick"> <TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)** **Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**

View file

@ -19,7 +19,7 @@ class PrometheusLogger:
**kwargs, **kwargs,
): ):
try: try:
verbose_logger.debug(f"in init prometheus metrics") print(f"in init prometheus metrics")
from prometheus_client import Counter from prometheus_client import Counter
self.litellm_llm_api_failed_requests_metric = Counter( self.litellm_llm_api_failed_requests_metric = Counter(

View file

@ -44,9 +44,18 @@ class PrometheusServicesLogger:
) # store the prometheus histogram/counter we need to call for each field in payload ) # store the prometheus histogram/counter we need to call for each field in payload
for service in self.services: for service in self.services:
histogram = self.create_histogram(service) histogram = self.create_histogram(service, type_of_request="latency")
counter = self.create_counter(service) counter_failed_request = self.create_counter(
self.payload_to_prometheus_map[service] = [histogram, counter] service, type_of_request="failed_requests"
)
counter_total_requests = self.create_counter(
service, type_of_request="total_requests"
)
self.payload_to_prometheus_map[service] = [
histogram,
counter_failed_request,
counter_total_requests,
]
self.prometheus_to_amount_map: dict = ( self.prometheus_to_amount_map: dict = (
{} {}
@ -74,26 +83,26 @@ class PrometheusServicesLogger:
return metric return metric
return None return None
def create_histogram(self, label: str): def create_histogram(self, service: str, type_of_request: str):
metric_name = "litellm_{}_latency".format(label) metric_name = "litellm_{}_{}".format(service, type_of_request)
is_registered = self.is_metric_registered(metric_name) is_registered = self.is_metric_registered(metric_name)
if is_registered: if is_registered:
return self.get_metric(metric_name) return self.get_metric(metric_name)
return self.Histogram( return self.Histogram(
metric_name, metric_name,
"Latency for {} service".format(label), "Latency for {} service".format(service),
labelnames=[label], labelnames=[service],
) )
def create_counter(self, label: str): def create_counter(self, service: str, type_of_request: str):
metric_name = "litellm_{}_failed_requests".format(label) metric_name = "litellm_{}_{}".format(service, type_of_request)
is_registered = self.is_metric_registered(metric_name) is_registered = self.is_metric_registered(metric_name)
if is_registered: if is_registered:
return self.get_metric(metric_name) return self.get_metric(metric_name)
return self.Counter( return self.Counter(
metric_name, metric_name,
"Total failed requests for {} service".format(label), "Total {} for {} service".format(type_of_request, service),
labelnames=[label], labelnames=[service],
) )
def observe_histogram( def observe_histogram(
@ -120,6 +129,8 @@ class PrometheusServicesLogger:
if self.mock_testing: if self.mock_testing:
self.mock_testing_success_calls += 1 self.mock_testing_success_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map: if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value] prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects: for obj in prom_objects:
@ -129,11 +140,19 @@ class PrometheusServicesLogger:
labels=payload.service.value, labels=payload.service.value,
amount=payload.duration, amount=payload.duration,
) )
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
)
def service_failure_hook(self, payload: ServiceLoggerPayload): def service_failure_hook(self, payload: ServiceLoggerPayload):
if self.mock_testing: if self.mock_testing:
self.mock_testing_failure_calls += 1 self.mock_testing_failure_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map: if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value] prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects: for obj in prom_objects:
@ -141,7 +160,7 @@ class PrometheusServicesLogger:
self.increment_counter( self.increment_counter(
counter=obj, counter=obj,
labels=payload.service.value, labels=payload.service.value,
amount=1, # LOG ERROR COUNT TO PROMETHEUS amount=1, # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
) )
async def async_service_success_hook(self, payload: ServiceLoggerPayload): async def async_service_success_hook(self, payload: ServiceLoggerPayload):
@ -151,6 +170,8 @@ class PrometheusServicesLogger:
if self.mock_testing: if self.mock_testing:
self.mock_testing_success_calls += 1 self.mock_testing_success_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map: if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value] prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects: for obj in prom_objects:
@ -160,12 +181,20 @@ class PrometheusServicesLogger:
labels=payload.service.value, labels=payload.service.value,
amount=payload.duration, amount=payload.duration,
) )
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
)
async def async_service_failure_hook(self, payload: ServiceLoggerPayload): async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
print(f"received error payload: {payload.error}") print(f"received error payload: {payload.error}")
if self.mock_testing: if self.mock_testing:
self.mock_testing_failure_calls += 1 self.mock_testing_failure_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map: if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value] prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects: for obj in prom_objects:

View file

@ -507,10 +507,11 @@ def construct_tool_use_system_prompt(
): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb ): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
tool_str_list = [] tool_str_list = []
for tool in tools: for tool in tools:
tool_function = get_attribute_or_key(tool, "function")
tool_str = construct_format_tool_for_claude_prompt( tool_str = construct_format_tool_for_claude_prompt(
tool["function"]["name"], get_attribute_or_key(tool_function, "name"),
tool["function"].get("description", ""), get_attribute_or_key(tool_function, "description", ""),
tool["function"].get("parameters", {}), get_attribute_or_key(tool_function, "parameters", {}),
) )
tool_str_list.append(tool_str) tool_str_list.append(tool_str)
tool_use_system_prompt = ( tool_use_system_prompt = (
@ -634,7 +635,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
</function_results> </function_results>
""" """
name = message.get("name") name = message.get("name")
content = message.get("content") content = message.get("content", "")
content = content.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")
# We can't determine from openai message format whether it's a successful or # We can't determine from openai message format whether it's a successful or
# error call result so default to the successful result template # error call result so default to the successful result template
@ -655,13 +657,15 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str: def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
invokes = "" invokes = ""
for tool in tool_calls: for tool in tool_calls:
if tool["type"] != "function": if get_attribute_or_key(tool, "type") != "function":
continue continue
tool_name = tool["function"]["name"] tool_function = get_attribute_or_key(tool,"function")
tool_name = get_attribute_or_key(tool_function, "name")
tool_arguments = get_attribute_or_key(tool_function, "arguments")
parameters = "".join( parameters = "".join(
f"<{param}>{val}</{param}>\n" f"<{param}>{val}</{param}>\n"
for param, val in json.loads(tool["function"]["arguments"]).items() for param, val in json.loads(tool_arguments).items()
) )
invokes += ( invokes += (
"<invoke>\n" "<invoke>\n"
@ -715,7 +719,7 @@ def anthropic_messages_pt_xml(messages: list):
{ {
"type": "text", "type": "text",
"text": ( "text": (
convert_to_anthropic_tool_result(messages[msg_i]) convert_to_anthropic_tool_result_xml(messages[msg_i])
if messages[msg_i]["role"] == "tool" if messages[msg_i]["role"] == "tool"
else messages[msg_i]["content"] else messages[msg_i]["content"]
), ),
@ -736,7 +740,7 @@ def anthropic_messages_pt_xml(messages: list):
if messages[msg_i].get( if messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke convertion
assistant_text += convert_to_anthropic_tool_invoke( # type: ignore assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
messages[msg_i]["tool_calls"] messages[msg_i]["tool_calls"]
) )
@ -848,12 +852,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
anthropic_tool_invoke = [ anthropic_tool_invoke = [
{ {
"type": "tool_use", "type": "tool_use",
"id": tool["id"], "id": get_attribute_or_key(tool, "id"),
"name": tool["function"]["name"], "name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"),
"input": json.loads(tool["function"]["arguments"]), "input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")),
} }
for tool in tool_calls for tool in tool_calls
if tool["type"] == "function" if get_attribute_or_key(tool, "type") == "function"
] ]
return anthropic_tool_invoke return anthropic_tool_invoke
@ -1074,7 +1078,8 @@ def cohere_message_pt(messages: list):
tool_result = convert_openai_message_to_cohere_tool_result(message) tool_result = convert_openai_message_to_cohere_tool_result(message)
tool_results.append(tool_result) tool_results.append(tool_result)
else: else:
prompt += message["content"] prompt += message["content"] + "\n\n"
prompt = prompt.rstrip()
return prompt, tool_results return prompt, tool_results
@ -1414,3 +1419,8 @@ def prompt_factory(
return default_pt( return default_pt(
messages=messages messages=messages
) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2) ) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
def get_attribute_or_key(tool_or_function, attribute, default=None):
if hasattr(tool_or_function, attribute):
return getattr(tool_or_function, attribute)
return tool_or_function.get(attribute, default)

View file

@ -123,7 +123,7 @@ class VertexAIAnthropicConfig:
""" """
- Run client init - Run client init
- Support async completion, streaming - Support async completion, streaming
""" """
@ -236,17 +236,19 @@ def completion(
if client is None: if client is None:
if vertex_credentials is not None and isinstance(vertex_credentials, str): if vertex_credentials is not None and isinstance(vertex_credentials, str):
import google.oauth2.service_account import google.oauth2.service_account
json_obj = json.loads(vertex_credentials)
creds = ( creds = (
google.oauth2.service_account.Credentials.from_service_account_info( google.oauth2.service_account.Credentials.from_service_account_info(
json_obj, json.loads(vertex_credentials),
scopes=["https://www.googleapis.com/auth/cloud-platform"], scopes=["https://www.googleapis.com/auth/cloud-platform"],
) )
) )
### CHECK IF ACCESS ### CHECK IF ACCESS
access_token = refresh_auth(credentials=creds) access_token = refresh_auth(credentials=creds)
else:
import google.auth
creds, _ = google.auth.default()
### CHECK IF ACCESS
access_token = refresh_auth(credentials=creds)
vertex_ai_client = AnthropicVertex( vertex_ai_client = AnthropicVertex(
project_id=vertex_project, project_id=vertex_project,

View file

@ -610,6 +610,7 @@ def completion(
"client", "client",
"rpm", "rpm",
"tpm", "tpm",
"max_parallel_requests",
"input_cost_per_token", "input_cost_per_token",
"output_cost_per_token", "output_cost_per_token",
"input_cost_per_second", "input_cost_per_second",
@ -2598,6 +2599,7 @@ def embedding(
client = kwargs.pop("client", None) client = kwargs.pop("client", None)
rpm = kwargs.pop("rpm", None) rpm = kwargs.pop("rpm", None)
tpm = kwargs.pop("tpm", None) tpm = kwargs.pop("tpm", None)
max_parallel_requests = kwargs.pop("max_parallel_requests", None)
model_info = kwargs.get("model_info", None) model_info = kwargs.get("model_info", None)
metadata = kwargs.get("metadata", None) metadata = kwargs.get("metadata", None)
encoding_format = kwargs.get("encoding_format", None) encoding_format = kwargs.get("encoding_format", None)
@ -2655,6 +2657,7 @@ def embedding(
"client", "client",
"rpm", "rpm",
"tpm", "tpm",
"max_parallel_requests",
"input_cost_per_token", "input_cost_per_token",
"output_cost_per_token", "output_cost_per_token",
"input_cost_per_second", "input_cost_per_second",
@ -3514,6 +3517,7 @@ def image_generation(
"client", "client",
"rpm", "rpm",
"tpm", "tpm",
"max_parallel_requests",
"input_cost_per_token", "input_cost_per_token",
"output_cost_per_token", "output_cost_per_token",
"hf_model_name", "hf_model_name",

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""] 3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -4,14 +4,12 @@ model_list:
model: openai/my-fake-model model: openai/my-fake-model
api_key: my-fake-key api_key: my-fake-key
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
# api_base: http://0.0.0.0:8080
stream_timeout: 0.001 stream_timeout: 0.001
- model_name: fake-openai-endpoint - model_name: fake-openai-endpoint
litellm_params: litellm_params:
model: openai/my-fake-model-2 model: openai/my-fake-model-2
api_key: my-fake-key api_key: my-fake-key
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
# api_base: http://0.0.0.0:8080
stream_timeout: 0.001 stream_timeout: 0.001
- litellm_params: - litellm_params:
model: azure/chatgpt-v-2 model: azure/chatgpt-v-2
@ -30,13 +28,6 @@ model_list:
# api_key: my-fake-key # api_key: my-fake-key
# api_base: https://exampleopenaiendpoint-production.up.railway.app/ # api_base: https://exampleopenaiendpoint-production.up.railway.app/
# litellm_settings:
# success_callback: ["prometheus"]
# failure_callback: ["prometheus"]
# service_callback: ["prometheus_system"]
# upperbound_key_generate_params:
# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
router_settings: router_settings:
routing_strategy: usage-based-routing-v2 routing_strategy: usage-based-routing-v2
# redis_url: "os.environ/REDIS_URL" # redis_url: "os.environ/REDIS_URL"
@ -48,6 +39,10 @@ router_settings:
litellm_settings: litellm_settings:
num_retries: 3 # retry call 3 times on each model_name num_retries: 3 # retry call 3 times on each model_name
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
service_callback: ["prometheus_system"]
general_settings: general_settings:
alerting: ["slack"] alerting: ["slack"]

View file

@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum):
"/v2/key/info", "/v2/key/info",
] ]
sso_only_routes: List = [
"/key/generate",
"/key/update",
"/key/delete",
"/global/spend/logs",
"/global/predict/spend/logs",
]
management_routes: List = [ # key management_routes: List = [ # key
"/key/generate", "/key/generate",
"/key/update", "/key/update",

View file

@ -1053,6 +1053,11 @@ async def user_api_key_auth(
status_code=status.HTTP_403_FORBIDDEN, status_code=status.HTTP_403_FORBIDDEN,
detail="key not allowed to access this team's info", detail="key not allowed to access this team's info",
) )
elif (
_has_user_setup_sso()
and route in LiteLLMRoutes.sso_only_routes.value
):
pass
else: else:
raise Exception( raise Exception(
f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}" f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
@ -1102,6 +1107,13 @@ async def user_api_key_auth(
return UserAPIKeyAuth( return UserAPIKeyAuth(
api_key=api_key, user_role="proxy_admin", **valid_token_dict api_key=api_key, user_role="proxy_admin", **valid_token_dict
) )
elif (
_has_user_setup_sso()
and route in LiteLLMRoutes.sso_only_routes.value
):
return UserAPIKeyAuth(
api_key=api_key, user_role="app_owner", **valid_token_dict
)
else: else:
raise Exception( raise Exception(
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed" f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
@ -5721,6 +5733,20 @@ async def new_user(data: NewUserRequest):
"user" # only create a user, don't create key if 'auto_create_key' set to False "user" # only create a user, don't create key if 'auto_create_key' set to False
) )
response = await generate_key_helper_fn(**data_json) response = await generate_key_helper_fn(**data_json)
# Admin UI Logic
# if team_id passed add this user to the team
if data_json.get("team_id", None) is not None:
await team_member_add(
data=TeamMemberAddRequest(
team_id=data_json.get("team_id", None),
member=Member(
user_id=data_json.get("user_id", None),
role="user",
user_email=data_json.get("user_email", None),
),
)
)
return NewUserResponse( return NewUserResponse(
key=response.get("token", ""), key=response.get("token", ""),
expires=response.get("expires", None), expires=response.get("expires", None),
@ -6526,13 +6552,20 @@ async def team_member_add(
existing_team_row = await prisma_client.get_data( # type: ignore existing_team_row = await prisma_client.get_data( # type: ignore
team_id=data.team_id, table_name="team", query_type="find_unique" team_id=data.team_id, table_name="team", query_type="find_unique"
) )
if existing_team_row is None:
raise HTTPException(
status_code=404,
detail={
"error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
},
)
new_member = data.member new_member = data.member
existing_team_row.members_with_roles.append(new_member) existing_team_row.members_with_roles.append(new_member)
complete_team_data = LiteLLM_TeamTable( complete_team_data = LiteLLM_TeamTable(
**existing_team_row.model_dump(), **_get_pydantic_json_dict(existing_team_row),
) )
team_row = await prisma_client.update_data( team_row = await prisma_client.update_data(
@ -8120,36 +8153,33 @@ async def auth_callback(request: Request):
} }
user_role = getattr(user_info, "user_role", None) user_role = getattr(user_info, "user_role", None)
else: ## check if user-email in db ##
## check if user-email in db ## user_info = await prisma_client.db.litellm_usertable.find_first(
user_info = await prisma_client.db.litellm_usertable.find_first( where={"user_email": user_email}
where={"user_email": user_email} )
) if user_info is not None:
if user_info is not None: user_defined_values = {
user_defined_values = { "models": getattr(user_info, "models", user_id_models),
"models": getattr(user_info, "models", user_id_models), "user_id": getattr(user_info, "user_id", user_id),
"user_id": getattr(user_info, "user_id", user_id), "user_email": getattr(user_info, "user_id", user_email),
"user_email": getattr(user_info, "user_id", user_email), "user_role": getattr(user_info, "user_role", None),
"user_role": getattr(user_info, "user_role", None), }
} user_role = getattr(user_info, "user_role", None)
user_role = getattr(user_info, "user_role", None)
# update id # update id
await prisma_client.db.litellm_usertable.update_many( await prisma_client.db.litellm_usertable.update_many(
where={"user_email": user_email}, data={"user_id": user_id} # type: ignore where={"user_email": user_email}, data={"user_id": user_id} # type: ignore
) )
elif litellm.default_user_params is not None and isinstance( elif litellm.default_user_params is not None and isinstance(
litellm.default_user_params, dict litellm.default_user_params, dict
): ):
user_defined_values = { user_defined_values = {
"models": litellm.default_user_params.get( "models": litellm.default_user_params.get("models", user_id_models),
"models", user_id_models "user_id": litellm.default_user_params.get("user_id", user_id),
), "user_email": litellm.default_user_params.get(
"user_id": litellm.default_user_params.get("user_id", user_id), "user_email", user_email
"user_email": litellm.default_user_params.get( ),
"user_email", user_email }
),
}
except Exception as e: except Exception as e:
pass pass

View file

@ -238,7 +238,10 @@ class ProxyLogging:
litellm_params = kwargs.get("litellm_params", {}) litellm_params = kwargs.get("litellm_params", {})
model = kwargs.get("model", "") model = kwargs.get("model", "")
api_base = litellm.get_api_base(model=model, optional_params=litellm_params) api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
messages = kwargs.get("messages", "") messages = kwargs.get("messages", None)
# if messages does not exist fallback to "input"
if messages is None:
messages = kwargs.get("input", None)
# only use first 100 chars for alerting # only use first 100 chars for alerting
_messages = str(messages)[:100] _messages = str(messages)[:100]
@ -282,7 +285,10 @@ class ProxyLogging:
): ):
if request_data is not None: if request_data is not None:
model = request_data.get("model", "") model = request_data.get("model", "")
messages = request_data.get("messages", "") messages = request_data.get("messages", None)
if messages is None:
# if messages does not exist fallback to "input"
messages = request_data.get("input", None)
trace_id = request_data.get("metadata", {}).get( trace_id = request_data.get("metadata", {}).get(
"trace_id", None "trace_id", None
) # get langfuse trace id ) # get langfuse trace id

View file

@ -26,7 +26,12 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
CustomHTTPTransport, CustomHTTPTransport,
AsyncCustomHTTPTransport, AsyncCustomHTTPTransport,
) )
from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime from litellm.utils import (
ModelResponse,
CustomStreamWrapper,
get_utc_datetime,
calculate_max_parallel_requests,
)
import copy import copy
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
import logging import logging
@ -61,6 +66,7 @@ class Router:
num_retries: int = 0, num_retries: int = 0,
timeout: Optional[float] = None, timeout: Optional[float] = None,
default_litellm_params={}, # default params for Router.chat.completion.create default_litellm_params={}, # default params for Router.chat.completion.create
default_max_parallel_requests: Optional[int] = None,
set_verbose: bool = False, set_verbose: bool = False,
debug_level: Literal["DEBUG", "INFO"] = "INFO", debug_level: Literal["DEBUG", "INFO"] = "INFO",
fallbacks: List = [], fallbacks: List = [],
@ -198,6 +204,7 @@ class Router:
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc. ) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
self.default_deployment = None # use this to track the users default deployment, when they want to use model = * self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
self.default_max_parallel_requests = default_max_parallel_requests
if model_list: if model_list:
model_list = copy.deepcopy(model_list) model_list = copy.deepcopy(model_list)
@ -213,6 +220,7 @@ class Router:
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown ) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
self.num_retries = num_retries or litellm.num_retries or 0 self.num_retries = num_retries or litellm.num_retries or 0
self.timeout = timeout or litellm.request_timeout self.timeout = timeout or litellm.request_timeout
self.retry_after = retry_after self.retry_after = retry_after
self.routing_strategy = routing_strategy self.routing_strategy = routing_strategy
self.fallbacks = fallbacks or litellm.fallbacks self.fallbacks = fallbacks or litellm.fallbacks
@ -298,7 +306,7 @@ class Router:
else: else:
litellm.failure_callback = [self.deployment_callback_on_failure] litellm.failure_callback = [self.deployment_callback_on_failure]
verbose_router_logger.info( verbose_router_logger.info(
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}" f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
) )
self.routing_strategy_args = routing_strategy_args self.routing_strategy_args = routing_strategy_args
@ -496,7 +504,9 @@ class Router:
) )
rpm_semaphore = self._get_client( rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client" deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
) )
if rpm_semaphore is not None and isinstance( if rpm_semaphore is not None and isinstance(
@ -681,7 +691,9 @@ class Router:
### CONCURRENCY-SAFE RPM CHECKS ### ### CONCURRENCY-SAFE RPM CHECKS ###
rpm_semaphore = self._get_client( rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client" deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
) )
if rpm_semaphore is not None and isinstance( if rpm_semaphore is not None and isinstance(
@ -803,7 +815,9 @@ class Router:
### CONCURRENCY-SAFE RPM CHECKS ### ### CONCURRENCY-SAFE RPM CHECKS ###
rpm_semaphore = self._get_client( rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client" deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
) )
if rpm_semaphore is not None and isinstance( if rpm_semaphore is not None and isinstance(
@ -1049,7 +1063,9 @@ class Router:
) )
rpm_semaphore = self._get_client( rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client" deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
) )
if rpm_semaphore is not None and isinstance( if rpm_semaphore is not None and isinstance(
@ -1243,7 +1259,9 @@ class Router:
### CONCURRENCY-SAFE RPM CHECKS ### ### CONCURRENCY-SAFE RPM CHECKS ###
rpm_semaphore = self._get_client( rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client" deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
) )
if rpm_semaphore is not None and isinstance( if rpm_semaphore is not None and isinstance(
@ -1862,17 +1880,23 @@ class Router:
model_id = model["model_info"]["id"] model_id = model["model_info"]["id"]
# ### IF RPM SET - initialize a semaphore ### # ### IF RPM SET - initialize a semaphore ###
rpm = litellm_params.get("rpm", None) rpm = litellm_params.get("rpm", None)
if rpm: tpm = litellm_params.get("tpm", None)
semaphore = asyncio.Semaphore(rpm) max_parallel_requests = litellm_params.get("max_parallel_requests", None)
cache_key = f"{model_id}_rpm_client" calculated_max_parallel_requests = calculate_max_parallel_requests(
rpm=rpm,
max_parallel_requests=max_parallel_requests,
tpm=tpm,
default_max_parallel_requests=self.default_max_parallel_requests,
)
if calculated_max_parallel_requests:
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
cache_key = f"{model_id}_max_parallel_requests_client"
self.cache.set_cache( self.cache.set_cache(
key=cache_key, key=cache_key,
value=semaphore, value=semaphore,
local_only=True, local_only=True,
) )
# print("STORES SEMAPHORE IN CACHE")
#### for OpenAI / Azure we need to initalize the Client for High Traffic ######## #### for OpenAI / Azure we need to initalize the Client for High Traffic ########
custom_llm_provider = litellm_params.get("custom_llm_provider") custom_llm_provider = litellm_params.get("custom_llm_provider")
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or "" custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
@ -2537,8 +2561,8 @@ class Router:
The appropriate client based on the given client_type and kwargs. The appropriate client based on the given client_type and kwargs.
""" """
model_id = deployment["model_info"]["id"] model_id = deployment["model_info"]["id"]
if client_type == "rpm_client": if client_type == "max_parallel_requests":
cache_key = "{}_rpm_client".format(model_id) cache_key = "{}_max_parallel_requests_client".format(model_id)
client = self.cache.get_cache(key=cache_key, local_only=True) client = self.cache.get_cache(key=cache_key, local_only=True)
return client return client
elif client_type == "async": elif client_type == "async":
@ -2778,6 +2802,7 @@ class Router:
""" """
if ( if (
self.routing_strategy != "usage-based-routing-v2" self.routing_strategy != "usage-based-routing-v2"
and self.routing_strategy != "simple-shuffle"
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented. ): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
return self.get_available_deployment( return self.get_available_deployment(
model=model, model=model,
@ -2828,6 +2853,25 @@ class Router:
messages=messages, messages=messages,
input=input, input=input,
) )
elif self.routing_strategy == "simple-shuffle":
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
############## Check if we can do a RPM/TPM based weighted pick #################
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
if rpm is not None:
# use weight-random pick if rpms provided
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
verbose_router_logger.debug(f"\nrpms {rpms}")
total_rpm = sum(rpms)
weights = [rpm / total_rpm for rpm in rpms]
verbose_router_logger.debug(f"\n weights {weights}")
# Perform weighted random pick
selected_index = random.choices(range(len(rpms)), weights=weights)[0]
verbose_router_logger.debug(f"\n selected index, {selected_index}")
deployment = healthy_deployments[selected_index]
verbose_router_logger.info(
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
)
return deployment or deployment[0]
if deployment is None: if deployment is None:
verbose_router_logger.info( verbose_router_logger.info(

View file

@ -407,13 +407,15 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
tpm_keys.append(tpm_key) tpm_keys.append(tpm_key)
rpm_keys.append(rpm_key) rpm_keys.append(rpm_key)
tpm_values = await self.router_cache.async_batch_get_cache( combined_tpm_rpm_keys = tpm_keys + rpm_keys
keys=tpm_keys
) # [1, 2, None, ..] combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache(
rpm_values = await self.router_cache.async_batch_get_cache( keys=combined_tpm_rpm_keys
keys=rpm_keys
) # [1, 2, None, ..] ) # [1, 2, None, ..]
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
return self._common_checks_available_deployment( return self._common_checks_available_deployment(
model_group=model_group, model_group=model_group,
healthy_deployments=healthy_deployments, healthy_deployments=healthy_deployments,

View file

@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling():
assert isinstance( assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str response.choices[0].message.tool_calls[0].function.arguments, str
) )
messages.append(
response.choices[0].message.model_dump()
) # Add assistant tool invokes
tool_result = (
'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
)
# Add user submitted tool results in the OpenAI format
messages.append(
{
"tool_call_id": response.choices[0].message.tool_calls[0].id,
"role": "tool",
"name": response.choices[0].message.tool_calls[0].function.name,
"content": tool_result,
}
)
# In the second response, Claude should deduce answer from tool results
second_response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=messages,
tools=tools,
tool_choice="auto",
)
print(f"second response: {second_response}")
assert isinstance(second_response.choices[0].message.content, str)
except RateLimitError: except RateLimitError:
pass pass
except Exception as e: except Exception as e:

View file

@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client):
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
from litellm.proxy.proxy_server import user_api_key_cache from litellm.proxy.proxy_server import user_api_key_cache
await new_team(
NewTeamRequest(
team_id="ishaan-special-team",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
_response = await new_user( _response = await new_user(
data=NewUserRequest( data=NewUserRequest(
models=["azure-gpt-3.5"], models=["azure-gpt-3.5"],
@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client):
async def test(): async def test():
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
# create team "litellm-core-infra@gmail.com""
print("creating team litellm-core-infra@gmail.com")
await new_team(
NewTeamRequest(
team_id="litellm-core-infra@gmail.com",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
await new_team(
NewTeamRequest(
team_id="ishaan-special-team",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
request = NewUserRequest( request = NewUserRequest(
metadata={"team": "litellm-team3", "project": "litellm-project3"}, metadata={"project": "litellm-project3"},
team_id="litellm-core-infra@gmail.com", team_id="litellm-core-infra@gmail.com",
) )
key = await new_user(request) key = await new_user(request)
print(key) print(key)
@ -1015,7 +1046,6 @@ def test_generate_and_update_key(prisma_client):
print("\n info for key=", result["info"]) print("\n info for key=", result["info"])
assert result["info"]["max_parallel_requests"] == None assert result["info"]["max_parallel_requests"] == None
assert result["info"]["metadata"] == { assert result["info"]["metadata"] == {
"team": "litellm-team3",
"project": "litellm-project3", "project": "litellm-project3",
} }
assert result["info"]["team_id"] == "litellm-core-infra@gmail.com" assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
@ -1037,7 +1067,7 @@ def test_generate_and_update_key(prisma_client):
# update the team id # update the team id
response2 = await update_key_fn( response2 = await update_key_fn(
request=Request, request=Request,
data=UpdateKeyRequest(key=generated_key, team_id="ishaan"), data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"),
) )
print("response2=", response2) print("response2=", response2)
@ -1048,11 +1078,10 @@ def test_generate_and_update_key(prisma_client):
print("\n info for key=", result["info"]) print("\n info for key=", result["info"])
assert result["info"]["max_parallel_requests"] == None assert result["info"]["max_parallel_requests"] == None
assert result["info"]["metadata"] == { assert result["info"]["metadata"] == {
"team": "litellm-team3",
"project": "litellm-project3", "project": "litellm-project3",
} }
assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"] assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
assert result["info"]["team_id"] == "ishaan" assert result["info"]["team_id"] == "ishaan-special-team"
# cleanup - delete key # cleanup - delete key
delete_key_request = KeyRequest(keys=[generated_key]) delete_key_request = KeyRequest(keys=[generated_key])
@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client):
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
from litellm.proxy.proxy_server import user_api_key_cache from litellm.proxy.proxy_server import user_api_key_cache
await new_team(
NewTeamRequest(
team_id="ishaans-special-team",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
_response = await new_user( _response = await new_user(
data=NewUserRequest( data=NewUserRequest(
models=["azure-gpt-3.5"], models=["azure-gpt-3.5"],

View file

@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
# Define the expected log messages # Define the expected log messages
# - error request, falling back notice, success notice # - error request, falling back notice, success notice
expected_logs = [ expected_logs = [
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None", "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None",
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m", "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
"Falling back to model_group = azure/gpt-3.5-turbo", "Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m", "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",

View file

@ -0,0 +1,115 @@
# What is this?
## Unit tests for the max_parallel_requests feature on Router
import sys, os, time, inspect, asyncio, traceback
from datetime import datetime
import pytest
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.utils import calculate_max_parallel_requests
from typing import Optional
"""
- only rpm
- only tpm
- only max_parallel_requests
- max_parallel_requests + rpm
- max_parallel_requests + tpm
- max_parallel_requests + tpm + rpm
"""
max_parallel_requests_values = [None, 10]
tpm_values = [None, 20, 300000]
rpm_values = [None, 30]
default_max_parallel_requests = [None, 40]
@pytest.mark.parametrize(
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
[
(mp, tp, rp, dmp)
for mp in max_parallel_requests_values
for tp in tpm_values
for rp in rpm_values
for dmp in default_max_parallel_requests
],
)
def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests):
calculated_max_parallel_requests = calculate_max_parallel_requests(
max_parallel_requests=max_parallel_requests,
rpm=rpm,
tpm=tpm,
default_max_parallel_requests=default_max_parallel_requests,
)
if max_parallel_requests is not None:
assert max_parallel_requests == calculated_max_parallel_requests
elif rpm is not None:
assert rpm == calculated_max_parallel_requests
elif tpm is not None:
calculated_rpm = int(tpm / 1000 / 6)
if calculated_rpm == 0:
calculated_rpm = 1
print(
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}"
)
assert calculated_rpm == calculated_max_parallel_requests
elif default_max_parallel_requests is not None:
assert calculated_max_parallel_requests == default_max_parallel_requests
else:
assert calculated_max_parallel_requests is None
@pytest.mark.parametrize(
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
[
(mp, tp, rp, dmp)
for mp in max_parallel_requests_values
for tp in tpm_values
for rp in rpm_values
for dmp in default_max_parallel_requests
],
)
def test_setting_mpr_limits_per_model(
max_parallel_requests, tpm, rpm, default_max_parallel_requests
):
deployment = {
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"max_parallel_requests": max_parallel_requests,
"tpm": tpm,
"rpm": rpm,
},
"model_info": {"id": "my-unique-id"},
}
router = litellm.Router(
model_list=[deployment],
default_max_parallel_requests=default_max_parallel_requests,
)
mpr_client: Optional[asyncio.Semaphore] = router._get_client(
deployment=deployment,
kwargs={},
client_type="max_parallel_requests",
)
if max_parallel_requests is not None:
assert max_parallel_requests == mpr_client._value
elif rpm is not None:
assert rpm == mpr_client._value
elif tpm is not None:
calculated_rpm = int(tpm / 1000 / 6)
if calculated_rpm == 0:
calculated_rpm = 1
print(
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}"
)
assert calculated_rpm == mpr_client._value
elif default_max_parallel_requests is not None:
assert mpr_client._value == default_max_parallel_requests
else:
assert mpr_client is None
# raise Exception("it worked!")

View file

@ -5434,6 +5434,49 @@ def get_optional_params(
return optional_params return optional_params
def calculate_max_parallel_requests(
max_parallel_requests: Optional[int],
rpm: Optional[int],
tpm: Optional[int],
default_max_parallel_requests: Optional[int],
) -> Optional[int]:
"""
Returns the max parallel requests to send to a deployment.
Used in semaphore for async requests on router.
Parameters:
- max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
- rpm - Optional[int] - requests per minute allowed for that deployment
- tpm - Optional[int] - tokens per minute allowed for that deployment
- default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
Returns:
- int or None (if all params are None)
Order:
max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
Azure RPM formula:
6 rpm per 1000 TPM
https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
"""
if max_parallel_requests is not None:
return max_parallel_requests
elif rpm is not None:
return rpm
elif tpm is not None:
calculated_rpm = int(tpm / 1000 / 6)
if calculated_rpm == 0:
calculated_rpm = 1
return calculated_rpm
elif default_max_parallel_requests is not None:
return default_max_parallel_requests
return None
def get_api_base(model: str, optional_params: dict) -> Optional[str]: def get_api_base(model: str, optional_params: dict) -> Optional[str]:
""" """
Returns the api base used for calling the model. Returns the api base used for calling the model.

View file

@ -96,9 +96,9 @@ litellm_settings:
router_settings: router_settings:
routing_strategy: usage-based-routing-v2 routing_strategy: usage-based-routing-v2
redis_host: os.environ/REDIS_HOST # redis_host: os.environ/REDIS_HOST
redis_password: os.environ/REDIS_PASSWORD # redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT # redis_port: os.environ/REDIS_PORT
enable_pre_call_checks: true enable_pre_call_checks: true
general_settings: general_settings:

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.35.17" version = "1.35.18"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.35.17" version = "1.35.18"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -14,6 +14,24 @@ sys.path.insert(
import litellm import litellm
async def generate_team(session):
url = "http://0.0.0.0:4000/team/new"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
data = {
"team_id": "litellm-dashboard",
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(f"Response (Status code: {status}):")
print(response_text)
print()
_json_response = await response.json()
return _json_response
async def generate_user( async def generate_user(
session, session,
user_role="app_owner", user_role="app_owner",
@ -668,7 +686,7 @@ async def test_key_rate_limit():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_key_delete(): async def test_key_delete_ui():
""" """
Admin UI flow - DO NOT DELETE Admin UI flow - DO NOT DELETE
-> Create a key with user_id = "ishaan" -> Create a key with user_id = "ishaan"
@ -680,6 +698,8 @@ async def test_key_delete():
key = key_gen["key"] key = key_gen["key"]
# generate a admin UI key # generate a admin UI key
team = await generate_team(session=session)
print("generated team: ", team)
admin_ui_key = await generate_user(session=session, user_role="proxy_admin") admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
print( print(
"trying to delete key=", "trying to delete key=",

View file

@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit():
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
pytest.fail("Expected at least 1 call to fail") pytest.fail("Expected at least 1 call to fail")
except Exception as e: except Exception as e:
pass if "Request did not return a 200 status code: 429" in str(e):
pass
else:
pytest.fail(f"Wrong error received - {str(e)}")
@pytest.mark.asyncio @pytest.mark.asyncio

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""] 3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -9,6 +9,7 @@ import Teams from "@/components/teams";
import AdminPanel from "@/components/admins"; import AdminPanel from "@/components/admins";
import Settings from "@/components/settings"; import Settings from "@/components/settings";
import GeneralSettings from "@/components/general_settings"; import GeneralSettings from "@/components/general_settings";
import APIRef from "@/components/api_ref";
import ChatUI from "@/components/chat_ui"; import ChatUI from "@/components/chat_ui";
import Sidebar from "../components/leftnav"; import Sidebar from "../components/leftnav";
import Usage from "../components/usage"; import Usage from "../components/usage";
@ -165,6 +166,8 @@ const CreateKeyPage = () => {
accessToken={accessToken} accessToken={accessToken}
showSSOBanner={showSSOBanner} showSSOBanner={showSSOBanner}
/> />
) : page == "api_ref" ? (
<APIRef/>
) : page == "settings" ? ( ) : page == "settings" ? (
<Settings <Settings
userID={userID} userID={userID}

View file

@ -0,0 +1,152 @@
"use client";
import React, { useEffect, useState } from "react";
import {
Badge,
Card,
Table,
Metric,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Text,
Title,
Icon,
Accordion,
AccordionBody,
AccordionHeader,
List,
ListItem,
Tab,
TabGroup,
TabList,
TabPanel,
TabPanels,
Grid,
} from "@tremor/react";
import { Statistic } from "antd"
import { modelAvailableCall } from "./networking";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
const APIRef = ({}) => {
return (
<>
<Grid className="gap-2 p-8 h-[80vh] w-full mt-2">
<div className="mb-5">
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">OpenAI Compatible Proxy: API Reference</p>
<Text className="mt-2 mb-2">LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below </Text>
<TabGroup>
<TabList>
<Tab>OpenAI Python SDK</Tab>
<Tab>LlamaIndex</Tab>
<Tab>Langchain Py</Tab>
</TabList>
<TabPanels>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import openai
client = openai.OpenAI(
api_key="your_api_key",
base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys
)
response = client.chat.completions.create(
model="gpt-3.5-turbo", # model to send to the proxy
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
]
)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import os, dotenv
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
llm = AzureOpenAI(
engine="azure-gpt-3.5", # model_name on litellm proxy
temperature=0.0,
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
api_key="sk-1234", # litellm proxy API Key
api_version="2023-07-01-preview",
)
embed_model = AzureOpenAIEmbedding(
deployment_name="azure-embedding-model",
azure_endpoint="http://0.0.0.0:4000",
api_key="sk-1234",
api_version="2023-07-01-preview",
)
documents = SimpleDirectoryReader("llama_index_data").load_data()
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
</TabPanels>
</TabGroup>
</div>
</Grid>
</>
)
}
export default APIRef;

View file

@ -13,12 +13,12 @@ import {
TabGroup, TabGroup,
TabList, TabList,
TabPanel, TabPanel,
TabPanels,
Metric, Metric,
Col, Col,
Text, Text,
SelectItem, SelectItem,
TextInput, TextInput,
TabPanels,
Button, Button,
} from "@tremor/react"; } from "@tremor/react";
@ -201,7 +201,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
<TabGroup> <TabGroup>
<TabList> <TabList>
<Tab>Chat</Tab> <Tab>Chat</Tab>
<Tab>API Reference</Tab>
</TabList> </TabList>
<TabPanels> <TabPanels>
@ -272,124 +271,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
</div> </div>
</div> </div>
</TabPanel> </TabPanel>
<TabPanel>
<TabGroup>
<TabList>
<Tab>OpenAI Python SDK</Tab>
<Tab>LlamaIndex</Tab>
<Tab>Langchain Py</Tab>
</TabList>
<TabPanels>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import openai
client = openai.OpenAI(
api_key="your_api_key",
base_url="http://0.0.0.0:4000" # proxy base url
)
response = client.chat.completions.create(
model="gpt-3.5-turbo", # model to use from Models Tab
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"generation_name": "ishaan-generation-openai-client",
"generation_id": "openai-client-gen-id22",
"trace_id": "openai-client-trace-id22",
"trace_user_id": "openai-client-user-id2"
}
}
)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import os, dotenv
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
llm = AzureOpenAI(
engine="azure-gpt-3.5", # model_name on litellm proxy
temperature=0.0,
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
api_key="sk-1234", # litellm proxy API Key
api_version="2023-07-01-preview",
)
embed_model = AzureOpenAIEmbedding(
deployment_name="azure-embedding-model",
azure_endpoint="http://0.0.0.0:4000",
api_key="sk-1234",
api_version="2023-07-01-preview",
)
documents = SimpleDirectoryReader("llama_index_data").load_data()
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:8000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"generation_name": "ishaan-generation-langchain-client",
"generation_id": "langchain-client-gen-id22",
"trace_id": "langchain-client-trace-id22",
"trace_user_id": "langchain-client-user-id2"
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
</TabPanels>
</TabGroup>
</TabPanel>
</TabPanels> </TabPanels>
</TabGroup> </TabGroup>
</Card> </Card>

View file

@ -2,7 +2,7 @@
import React, { useState, useEffect, useRef } from "react"; import React, { useState, useEffect, useRef } from "react";
import { Button, TextInput, Grid, Col } from "@tremor/react"; import { Button, TextInput, Grid, Col } from "@tremor/react";
import { Card, Metric, Text, Title, Subtitle } from "@tremor/react"; import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react";
import { CopyToClipboard } from 'react-copy-to-clipboard'; import { CopyToClipboard } from 'react-copy-to-clipboard';
import { import {
Button as Button2, Button as Button2,
@ -147,6 +147,17 @@ const CreateKey: React.FC<CreateKeyProps> = ({
mode="multiple" mode="multiple"
placeholder="Select models" placeholder="Select models"
style={{ width: "100%" }} style={{ width: "100%" }}
onChange={(values) => {
// Check if "All Team Models" is selected
const isAllTeamModelsSelected = values.includes("all-team-models");
// If "All Team Models" is selected, deselect all other models
if (isAllTeamModelsSelected) {
const newValues = ["all-team-models"];
// You can call the form's setFieldsValue method to update the value
form.setFieldsValue({ models: newValues });
}
}}
> >
<Option key="all-team-models" value="all-team-models"> <Option key="all-team-models" value="all-team-models">
All Team Models All Team Models
@ -248,16 +259,153 @@ const CreateKey: React.FC<CreateKeyProps> = ({
</> </>
) : ( ) : (
<> <>
<Form.Item label="Key Name" name="key_alias"> <Form.Item
label="Key Name"
name="key_alias"
rules={[{ required: true, message: 'Please input a key name' }]}
help="required"
>
<Input /> <Input />
</Form.Item> </Form.Item>
<Form.Item label="Team ID (Contact Group)" name="team_id"> <Form.Item
<Input placeholder="default team (create a new team)" /> label="Team ID"
name="team_id"
hidden={true}
initialValue={team ? team["team_id"] : null}
valuePropName="team_id"
className="mt-8"
>
<Input value={team ? team["team_alias"] : ""} disabled />
</Form.Item> </Form.Item>
<Form.Item label="Description" name="description"> <Form.Item
<Input.TextArea placeholder="Enter description" rows={4} /> label="Models"
name="models"
className="mb-12"
rules={[{ required: true, message: 'Please select a model' }]}
help="required"
>
<Select
mode="multiple"
placeholder="Select models"
style={{ width: "100%" }}
onChange={(values) => {
const isAllTeamModelsSelected = values.includes("all-team-models");
if (isAllTeamModelsSelected) {
const newValues = ["all-team-models"];
form.setFieldsValue({ models: newValues });
}
}}
>
<Option key="all-team-models" value="all-team-models">
All Team Models
</Option>
{team && team.models ? (
team.models.includes("all-proxy-models") ? (
userModels.map((model: string) => (
(
<Option key={model} value={model}>
{model}
</Option>
)
))
) : (
team.models.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)
) : (
userModels.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)}
</Select>
</Form.Item> </Form.Item>
<Accordion className="mt-20 mb-8" >
<AccordionHeader>
<b>Optional Settings</b>
</AccordionHeader>
<AccordionBody>
<Form.Item
className="mt-8"
label="Max Budget (USD)"
name="max_budget"
help={`Budget cannot exceed team max budget: $${team?.max_budget !== null && team?.max_budget !== undefined ? team?.max_budget : 'unlimited'}`}
rules={[
{
validator: async (_, value) => {
if (value && team && team.max_budget !== null && value > team.max_budget) {
throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`);
}
},
},
]}
>
<InputNumber step={0.01} precision={2} width={200} />
</Form.Item>
<Form.Item
className="mt-8"
label="Reset Budget"
name="budget_duration"
help={`Team Reset Budget: ${team?.budget_duration !== null && team?.budget_duration !== undefined ? team?.budget_duration : 'None'}`}
>
<Select defaultValue={null} placeholder="n/a">
<Select.Option value="24h">daily</Select.Option>
<Select.Option value="30d">monthly</Select.Option>
</Select>
</Form.Item>
<Form.Item
className="mt-8"
label="Tokens per minute Limit (TPM)"
name="tpm_limit"
help={`TPM cannot exceed team TPM limit: ${team?.tpm_limit !== null && team?.tpm_limit !== undefined ? team?.tpm_limit : 'unlimited'}`}
rules={[
{
validator: async (_, value) => {
if (value && team && team.tpm_limit !== null && value > team.tpm_limit) {
throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`);
}
},
},
]}
>
<InputNumber step={1} width={400} />
</Form.Item>
<Form.Item
className="mt-8"
label="Requests per minute Limit (RPM)"
name="rpm_limit"
help={`RPM cannot exceed team RPM limit: ${team?.rpm_limit !== null && team?.rpm_limit !== undefined ? team?.rpm_limit : 'unlimited'}`}
rules={[
{
validator: async (_, value) => {
if (value && team && team.rpm_limit !== null && value > team.rpm_limit) {
throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`);
}
},
},
]}
>
<InputNumber step={1} width={400} />
</Form.Item>
<Form.Item label="Expire Key (eg: 30s, 30h, 30d)" name="duration" className="mt-8">
<Input />
</Form.Item>
<Form.Item label="Metadata" name="metadata">
<Input.TextArea rows={4} placeholder="Enter metadata as JSON" />
</Form.Item>
</AccordionBody>
</Accordion>
</> </>
)} )}
<div style={{ textAlign: "right", marginTop: "10px" }}> <div style={{ textAlign: "right", marginTop: "10px" }}>

View file

@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react";
interface DashboardTeamProps { interface DashboardTeamProps {
teams: Object[] | null; teams: Object[] | null;
setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>; setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
userRole: string | null;
} }
type TeamInterface = { type TeamInterface = {
@ -15,6 +16,7 @@ type TeamInterface = {
const DashboardTeam: React.FC<DashboardTeamProps> = ({ const DashboardTeam: React.FC<DashboardTeamProps> = ({
teams, teams,
setSelectedTeam, setSelectedTeam,
userRole,
}) => { }) => {
const defaultTeam: TeamInterface = { const defaultTeam: TeamInterface = {
models: [], models: [],
@ -25,19 +27,26 @@ const DashboardTeam: React.FC<DashboardTeamProps> = ({
const [value, setValue] = useState(defaultTeam); const [value, setValue] = useState(defaultTeam);
const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam]; let updatedTeams;
if (userRole === "App User") {
// Non-Admin SSO users should only see their own team - they should not see "Default Team"
updatedTeams = teams;
} else {
updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
}
if (userRole === 'App User') return null;
return ( return (
<div className="mt-5 mb-5"> <div className="mt-5 mb-5">
<Title>Select Team</Title> <Title>Select Team</Title>
<Text> <Text>
If you belong to multiple teams, this setting controls which team is If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys.
used by default when creating new API Keys.
</Text> </Text>
<Text className="mt-3 mb-3"> <Text className="mt-3 mb-3">
<b>Default Team:</b> If no team_id is set for a key, it will be grouped under here. <b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
</Text> </Text>
{updatedTeams && updatedTeams.length > 0 ? ( {updatedTeams && updatedTeams.length > 0 ? (
<Select defaultValue="0"> <Select defaultValue="0">
{updatedTeams.map((team: any, index) => ( {updatedTeams.map((team: any, index) => (

View file

@ -46,8 +46,8 @@ const Sidebar: React.FC<SidebarProps> = ({
); );
} }
return ( return (
<Layout style={{ minHeight: "100vh", maxWidth: "120px" }}> <Layout style={{ minHeight: "100vh", maxWidth: "130px" }}>
<Sider width={120}> <Sider width={130}>
<Menu <Menu
mode="inline" mode="inline"
defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]} defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
@ -63,11 +63,23 @@ const Sidebar: React.FC<SidebarProps> = ({
Test Key Test Key
</Text> </Text>
</Menu.Item> </Menu.Item>
<Menu.Item key="2" onClick={() => setPage("models")}>
<Text> <Menu.Item key="11" onClick={() => setPage("api_ref")}>
Models <Text>
</Text> API Reference
</Menu.Item> </Text>
</Menu.Item>
{
userRole == "Admin" ? (
<Menu.Item key="2" onClick={() => setPage("models")}>
<Text>
Models
</Text>
</Menu.Item>
) : null
}
{userRole == "Admin" ? ( {userRole == "Admin" ? (
<Menu.Item key="6" onClick={() => setPage("teams")}> <Menu.Item key="6" onClick={() => setPage("teams")}>
<Text> <Text>
@ -75,11 +87,18 @@ const Sidebar: React.FC<SidebarProps> = ({
</Text> </Text>
</Menu.Item> </Menu.Item>
) : null} ) : null}
<Menu.Item key="4" onClick={() => setPage("usage")}>
<Text> {
Usage userRole == "Admin" ? (
</Text> <Menu.Item key="4" onClick={() => setPage("usage")}>
</Menu.Item> <Text>
Usage
</Text>
</Menu.Item>
) : null
}
{userRole == "Admin" ? ( {userRole == "Admin" ? (
<Menu.Item key="5" onClick={() => setPage("users")}> <Menu.Item key="5" onClick={() => setPage("users")}>
<Text> <Text>
@ -87,16 +106,27 @@ const Sidebar: React.FC<SidebarProps> = ({
</Text> </Text>
</Menu.Item> </Menu.Item>
) : null} ) : null}
<Menu.Item key="8" onClick={() => setPage("settings")}>
<Text> {
Integrations userRole == "Admin" ? (
</Text> <Menu.Item key="8" onClick={() => setPage("settings")}>
</Menu.Item> <Text>
<Menu.Item key="9" onClick={() => setPage("general-settings")}> Integrations
<Text> </Text>
Settings </Menu.Item>
</Text> ) : null
</Menu.Item> }
{
userRole == "Admin" ? (
<Menu.Item key="9" onClick={() => setPage("general-settings")}>
<Text>
Settings
</Text>
</Menu.Item>
) : null
}
{userRole == "Admin" ? ( {userRole == "Admin" ? (
<Menu.Item key="7" onClick={() => setPage("admin-panel")}> <Menu.Item key="7" onClick={() => setPage("admin-panel")}>
<Text> <Text>

View file

@ -296,6 +296,9 @@ export const userInfoCall = async (
if (userRole == "App Owner" && userID) { if (userRole == "App Owner" && userID) {
url = `${url}?user_id=${userID}`; url = `${url}?user_id=${userID}`;
} }
if (userRole == "App User" && userID) {
url = `${url}?user_id=${userID}`;
}
console.log("in userInfoCall viewAll=", viewAll); console.log("in userInfoCall viewAll=", viewAll);
if (viewAll && page_size && (page != null) && (page != undefined)) { if (viewAll && page_size && (page != null) && (page != undefined)) {
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`; url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;

View file

@ -5,6 +5,7 @@ import { Grid, Col, Card, Text, Title } from "@tremor/react";
import CreateKey from "./create_key_button"; import CreateKey from "./create_key_button";
import ViewKeyTable from "./view_key_table"; import ViewKeyTable from "./view_key_table";
import ViewUserSpend from "./view_user_spend"; import ViewUserSpend from "./view_user_spend";
import ViewUserTeam from "./view_user_team";
import DashboardTeam from "./dashboard_default_team"; import DashboardTeam from "./dashboard_default_team";
import { useSearchParams, useRouter } from "next/navigation"; import { useSearchParams, useRouter } from "next/navigation";
import { jwtDecode } from "jwt-decode"; import { jwtDecode } from "jwt-decode";
@ -232,11 +233,19 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
<div className="w-full mx-4"> <div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2"> <Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
<Col numColSpan={1}> <Col numColSpan={1}>
<ViewUserTeam
userID={userID}
userRole={userRole}
selectedTeam={selectedTeam ? selectedTeam : null}
accessToken={accessToken}
/>
<ViewUserSpend <ViewUserSpend
userID={userID} userID={userID}
userRole={userRole} userRole={userRole}
accessToken={accessToken} accessToken={accessToken}
userSpend={teamSpend} userSpend={teamSpend}
selectedTeam = {selectedTeam ? selectedTeam : null}
/> />
<ViewKeyTable <ViewKeyTable
@ -257,7 +266,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
data={keys} data={keys}
setData={setKeys} setData={setKeys}
/> />
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} /> <DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
</Col> </Col>
</Grid> </Grid>
</div> </div>

View file

@ -2,7 +2,7 @@
import React, { useEffect, useState } from "react"; import React, { useEffect, useState } from "react";
import { keyDeleteCall, getTotalSpendCall } from "./networking"; import { keyDeleteCall, getTotalSpendCall } from "./networking";
import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline"; import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
import { DonutChart } from "@tremor/react"; import { Accordion, AccordionHeader, AccordionList, DonutChart } from "@tremor/react";
import { import {
Badge, Badge,
Card, Card,
@ -16,9 +16,13 @@ import {
Text, Text,
Title, Title,
Icon, Icon,
AccordionBody,
List,
ListItem,
} from "@tremor/react"; } from "@tremor/react";
import { Statistic } from "antd" import { Statistic } from "antd"
import { spendUsersCall } from "./networking"; import { spendUsersCall, modelAvailableCall } from "./networking";
// Define the props type // Define the props type
@ -32,11 +36,13 @@ interface ViewUserSpendProps {
userRole: string | null; userRole: string | null;
accessToken: string | null; accessToken: string | null;
userSpend: number | null; userSpend: number | null;
selectedTeam: any | null;
} }
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend }) => { const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend, selectedTeam }) => {
console.log(`userSpend: ${userSpend}`) console.log(`userSpend: ${userSpend}`)
let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0); let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
const [maxBudget, setMaxBudget] = useState(0.0); const [maxBudget, setMaxBudget] = useState(0.0);
const [userModels, setUserModels] = useState([]);
useEffect(() => { useEffect(() => {
const fetchData = async () => { const fetchData = async () => {
if (!accessToken || !userID || !userRole) { if (!accessToken || !userID || !userRole) {
@ -62,9 +68,30 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
} }
} }
}; };
const fetchUserModels = async () => {
try {
if (userID === null || userRole === null) {
return;
}
if (accessToken !== null) {
const model_available = await modelAvailableCall(accessToken, userID, userRole);
let available_model_names = model_available["data"].map(
(element: { id: string }) => element.id
);
console.log("available_model_names:", available_model_names);
setUserModels(available_model_names);
}
} catch (error) {
console.error("Error fetching user models:", error);
}
};
fetchUserModels();
fetchData(); fetchData();
}, [userRole, accessToken]); }, [userRole, accessToken, userID]);
useEffect(() => { useEffect(() => {
if (userSpend !== null) { if (userSpend !== null) {
@ -72,18 +99,50 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
} }
}, [userSpend]) }, [userSpend])
// logic to decide what models to display
let modelsToDisplay = [];
if (selectedTeam && selectedTeam.models) {
modelsToDisplay = selectedTeam.models;
}
// check if "all-proxy-models" is in modelsToDisplay
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
console.log("user models:", userModels);
modelsToDisplay = userModels;
}
const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit"; const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";
const roundedSpend = spend !== undefined ? spend.toFixed(4) : null; const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;
console.log(`spend in view user spend: ${spend}`) console.log(`spend in view user spend: ${spend}`)
return ( return (
<> <div className="flex items-center">
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">Total Spend </p> <div>
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">${roundedSpend}</p> <p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">
Total Spend{" "}
</> </p>
) <p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">
${roundedSpend}
</p>
</div>
<div className="ml-auto">
<Accordion>
<AccordionHeader>Models</AccordionHeader>
<AccordionBody className="absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs">
<List>
{modelsToDisplay.map((model: string) => (
<ListItem key={model}>
<Text>{model}</Text>
</ListItem>
))}
</List>
</AccordionBody>
</Accordion>
</div>
</div>
);
} }
export default ViewUserSpend; export default ViewUserSpend;

View file

@ -0,0 +1,78 @@
"use client";
import React, { useEffect, useState } from "react";
import {
Badge,
Card,
Table,
Metric,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Text,
Title,
Icon,
Accordion,
AccordionBody,
AccordionHeader,
List,
ListItem,
} from "@tremor/react";
import { Statistic } from "antd"
import { modelAvailableCall } from "./networking";
interface ViewUserTeamProps {
userID: string | null;
userRole: string | null;
selectedTeam: any | null;
accessToken: string | null;
}
const ViewUserTeam: React.FC<ViewUserTeamProps> = ({ userID, userRole, selectedTeam, accessToken}) => {
const [userModels, setUserModels] = useState([]);
useEffect(() => {
const fetchUserModels = async () => {
try {
if (userID === null || userRole === null) {
return;
}
if (accessToken !== null) {
const model_available = await modelAvailableCall(accessToken, userID, userRole);
let available_model_names = model_available["data"].map(
(element: { id: string }) => element.id
);
console.log("available_model_names:", available_model_names);
setUserModels(available_model_names);
}
} catch (error) {
console.error("Error fetching user models:", error);
}
};
fetchUserModels();
}, [accessToken, userID, userRole]);
// logic to decide what models to display
let modelsToDisplay = [];
if (selectedTeam && selectedTeam.models) {
modelsToDisplay = selectedTeam.models;
}
// check if "all-proxy-models" is in modelsToDisplay
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
console.log("user models:", userModels);
modelsToDisplay = userModels;
}
return (
<>
<div className="mb-5">
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">{selectedTeam?.team_alias}</p>
</div>
</>
)
}
export default ViewUserTeam;