Merge branch 'BerriAI:main' into feature/watsonx-integration

This commit is contained in:
Simon S. Viloria 2024-04-21 10:35:51 +02:00 committed by GitHub
commit a77537ddd4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
45 changed files with 1027 additions and 281 deletions

View file

@ -279,7 +279,7 @@ router_settings:
```
</TabItem>
<TabItem value="simple-shuffle" label="(Default) Weighted Pick">
<TabItem value="simple-shuffle" label="(Default) Weighted Pick (Async)">
**Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)**

View file

@ -19,7 +19,7 @@ class PrometheusLogger:
**kwargs,
):
try:
verbose_logger.debug(f"in init prometheus metrics")
print(f"in init prometheus metrics")
from prometheus_client import Counter
self.litellm_llm_api_failed_requests_metric = Counter(

View file

@ -44,9 +44,18 @@ class PrometheusServicesLogger:
) # store the prometheus histogram/counter we need to call for each field in payload
for service in self.services:
histogram = self.create_histogram(service)
counter = self.create_counter(service)
self.payload_to_prometheus_map[service] = [histogram, counter]
histogram = self.create_histogram(service, type_of_request="latency")
counter_failed_request = self.create_counter(
service, type_of_request="failed_requests"
)
counter_total_requests = self.create_counter(
service, type_of_request="total_requests"
)
self.payload_to_prometheus_map[service] = [
histogram,
counter_failed_request,
counter_total_requests,
]
self.prometheus_to_amount_map: dict = (
{}
@ -74,26 +83,26 @@ class PrometheusServicesLogger:
return metric
return None
def create_histogram(self, label: str):
metric_name = "litellm_{}_latency".format(label)
def create_histogram(self, service: str, type_of_request: str):
metric_name = "litellm_{}_{}".format(service, type_of_request)
is_registered = self.is_metric_registered(metric_name)
if is_registered:
return self.get_metric(metric_name)
return self.Histogram(
metric_name,
"Latency for {} service".format(label),
labelnames=[label],
"Latency for {} service".format(service),
labelnames=[service],
)
def create_counter(self, label: str):
metric_name = "litellm_{}_failed_requests".format(label)
def create_counter(self, service: str, type_of_request: str):
metric_name = "litellm_{}_{}".format(service, type_of_request)
is_registered = self.is_metric_registered(metric_name)
if is_registered:
return self.get_metric(metric_name)
return self.Counter(
metric_name,
"Total failed requests for {} service".format(label),
labelnames=[label],
"Total {} for {} service".format(type_of_request, service),
labelnames=[service],
)
def observe_histogram(
@ -120,6 +129,8 @@ class PrometheusServicesLogger:
if self.mock_testing:
self.mock_testing_success_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
@ -129,11 +140,19 @@ class PrometheusServicesLogger:
labels=payload.service.value,
amount=payload.duration,
)
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
)
def service_failure_hook(self, payload: ServiceLoggerPayload):
if self.mock_testing:
self.mock_testing_failure_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
@ -141,7 +160,7 @@ class PrometheusServicesLogger:
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG ERROR COUNT TO PROMETHEUS
amount=1, # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
)
async def async_service_success_hook(self, payload: ServiceLoggerPayload):
@ -151,6 +170,8 @@ class PrometheusServicesLogger:
if self.mock_testing:
self.mock_testing_success_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
@ -160,12 +181,20 @@ class PrometheusServicesLogger:
labels=payload.service.value,
amount=payload.duration,
)
elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS
)
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
print(f"received error payload: {payload.error}")
if self.mock_testing:
self.mock_testing_failure_calls += 1
print(f"payload call type: {payload.call_type}")
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:

View file

@ -507,10 +507,11 @@ def construct_tool_use_system_prompt(
): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb
tool_str_list = []
for tool in tools:
tool_function = get_attribute_or_key(tool, "function")
tool_str = construct_format_tool_for_claude_prompt(
tool["function"]["name"],
tool["function"].get("description", ""),
tool["function"].get("parameters", {}),
get_attribute_or_key(tool_function, "name"),
get_attribute_or_key(tool_function, "description", ""),
get_attribute_or_key(tool_function, "parameters", {}),
)
tool_str_list.append(tool_str)
tool_use_system_prompt = (
@ -634,7 +635,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
</function_results>
"""
name = message.get("name")
content = message.get("content")
content = message.get("content", "")
content = content.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")
# We can't determine from openai message format whether it's a successful or
# error call result so default to the successful result template
@ -655,13 +657,15 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str:
def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str:
invokes = ""
for tool in tool_calls:
if tool["type"] != "function":
if get_attribute_or_key(tool, "type") != "function":
continue
tool_name = tool["function"]["name"]
tool_function = get_attribute_or_key(tool,"function")
tool_name = get_attribute_or_key(tool_function, "name")
tool_arguments = get_attribute_or_key(tool_function, "arguments")
parameters = "".join(
f"<{param}>{val}</{param}>\n"
for param, val in json.loads(tool["function"]["arguments"]).items()
for param, val in json.loads(tool_arguments).items()
)
invokes += (
"<invoke>\n"
@ -715,7 +719,7 @@ def anthropic_messages_pt_xml(messages: list):
{
"type": "text",
"text": (
convert_to_anthropic_tool_result(messages[msg_i])
convert_to_anthropic_tool_result_xml(messages[msg_i])
if messages[msg_i]["role"] == "tool"
else messages[msg_i]["content"]
),
@ -736,7 +740,7 @@ def anthropic_messages_pt_xml(messages: list):
if messages[msg_i].get(
"tool_calls", []
): # support assistant tool invoke convertion
assistant_text += convert_to_anthropic_tool_invoke( # type: ignore
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
messages[msg_i]["tool_calls"]
)
@ -848,12 +852,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list:
anthropic_tool_invoke = [
{
"type": "tool_use",
"id": tool["id"],
"name": tool["function"]["name"],
"input": json.loads(tool["function"]["arguments"]),
"id": get_attribute_or_key(tool, "id"),
"name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"),
"input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")),
}
for tool in tool_calls
if tool["type"] == "function"
if get_attribute_or_key(tool, "type") == "function"
]
return anthropic_tool_invoke
@ -1074,7 +1078,8 @@ def cohere_message_pt(messages: list):
tool_result = convert_openai_message_to_cohere_tool_result(message)
tool_results.append(tool_result)
else:
prompt += message["content"]
prompt += message["content"] + "\n\n"
prompt = prompt.rstrip()
return prompt, tool_results
@ -1414,3 +1419,8 @@ def prompt_factory(
return default_pt(
messages=messages
) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
def get_attribute_or_key(tool_or_function, attribute, default=None):
if hasattr(tool_or_function, attribute):
return getattr(tool_or_function, attribute)
return tool_or_function.get(attribute, default)

View file

@ -236,17 +236,19 @@ def completion(
if client is None:
if vertex_credentials is not None and isinstance(vertex_credentials, str):
import google.oauth2.service_account
json_obj = json.loads(vertex_credentials)
creds = (
google.oauth2.service_account.Credentials.from_service_account_info(
json_obj,
json.loads(vertex_credentials),
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
)
### CHECK IF ACCESS
access_token = refresh_auth(credentials=creds)
else:
import google.auth
creds, _ = google.auth.default()
### CHECK IF ACCESS
access_token = refresh_auth(credentials=creds)
vertex_ai_client = AnthropicVertex(
project_id=vertex_project,

View file

@ -610,6 +610,7 @@ def completion(
"client",
"rpm",
"tpm",
"max_parallel_requests",
"input_cost_per_token",
"output_cost_per_token",
"input_cost_per_second",
@ -2598,6 +2599,7 @@ def embedding(
client = kwargs.pop("client", None)
rpm = kwargs.pop("rpm", None)
tpm = kwargs.pop("tpm", None)
max_parallel_requests = kwargs.pop("max_parallel_requests", None)
model_info = kwargs.get("model_info", None)
metadata = kwargs.get("metadata", None)
encoding_format = kwargs.get("encoding_format", None)
@ -2655,6 +2657,7 @@ def embedding(
"client",
"rpm",
"tpm",
"max_parallel_requests",
"input_cost_per_token",
"output_cost_per_token",
"input_cost_per_second",
@ -3514,6 +3517,7 @@ def image_generation(
"client",
"rpm",
"tpm",
"max_parallel_requests",
"input_cost_per_token",
"output_cost_per_token",
"hf_model_name",

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -4,14 +4,12 @@ model_list:
model: openai/my-fake-model
api_key: my-fake-key
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
# api_base: http://0.0.0.0:8080
stream_timeout: 0.001
- model_name: fake-openai-endpoint
litellm_params:
model: openai/my-fake-model-2
api_key: my-fake-key
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
# api_base: http://0.0.0.0:8080
stream_timeout: 0.001
- litellm_params:
model: azure/chatgpt-v-2
@ -30,13 +28,6 @@ model_list:
# api_key: my-fake-key
# api_base: https://exampleopenaiendpoint-production.up.railway.app/
# litellm_settings:
# success_callback: ["prometheus"]
# failure_callback: ["prometheus"]
# service_callback: ["prometheus_system"]
# upperbound_key_generate_params:
# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
router_settings:
routing_strategy: usage-based-routing-v2
# redis_url: "os.environ/REDIS_URL"
@ -48,6 +39,10 @@ router_settings:
litellm_settings:
num_retries: 3 # retry call 3 times on each model_name
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
service_callback: ["prometheus_system"]
general_settings:
alerting: ["slack"]

View file

@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum):
"/v2/key/info",
]
sso_only_routes: List = [
"/key/generate",
"/key/update",
"/key/delete",
"/global/spend/logs",
"/global/predict/spend/logs",
]
management_routes: List = [ # key
"/key/generate",
"/key/update",

View file

@ -1053,6 +1053,11 @@ async def user_api_key_auth(
status_code=status.HTTP_403_FORBIDDEN,
detail="key not allowed to access this team's info",
)
elif (
_has_user_setup_sso()
and route in LiteLLMRoutes.sso_only_routes.value
):
pass
else:
raise Exception(
f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
@ -1102,6 +1107,13 @@ async def user_api_key_auth(
return UserAPIKeyAuth(
api_key=api_key, user_role="proxy_admin", **valid_token_dict
)
elif (
_has_user_setup_sso()
and route in LiteLLMRoutes.sso_only_routes.value
):
return UserAPIKeyAuth(
api_key=api_key, user_role="app_owner", **valid_token_dict
)
else:
raise Exception(
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
@ -5721,6 +5733,20 @@ async def new_user(data: NewUserRequest):
"user" # only create a user, don't create key if 'auto_create_key' set to False
)
response = await generate_key_helper_fn(**data_json)
# Admin UI Logic
# if team_id passed add this user to the team
if data_json.get("team_id", None) is not None:
await team_member_add(
data=TeamMemberAddRequest(
team_id=data_json.get("team_id", None),
member=Member(
user_id=data_json.get("user_id", None),
role="user",
user_email=data_json.get("user_email", None),
),
)
)
return NewUserResponse(
key=response.get("token", ""),
expires=response.get("expires", None),
@ -6526,13 +6552,20 @@ async def team_member_add(
existing_team_row = await prisma_client.get_data( # type: ignore
team_id=data.team_id, table_name="team", query_type="find_unique"
)
if existing_team_row is None:
raise HTTPException(
status_code=404,
detail={
"error": f"Team not found for team_id={getattr(data, 'team_id', None)}"
},
)
new_member = data.member
existing_team_row.members_with_roles.append(new_member)
complete_team_data = LiteLLM_TeamTable(
**existing_team_row.model_dump(),
**_get_pydantic_json_dict(existing_team_row),
)
team_row = await prisma_client.update_data(
@ -8120,7 +8153,6 @@ async def auth_callback(request: Request):
}
user_role = getattr(user_info, "user_role", None)
else:
## check if user-email in db ##
user_info = await prisma_client.db.litellm_usertable.find_first(
where={"user_email": user_email}
@ -8142,9 +8174,7 @@ async def auth_callback(request: Request):
litellm.default_user_params, dict
):
user_defined_values = {
"models": litellm.default_user_params.get(
"models", user_id_models
),
"models": litellm.default_user_params.get("models", user_id_models),
"user_id": litellm.default_user_params.get("user_id", user_id),
"user_email": litellm.default_user_params.get(
"user_email", user_email

View file

@ -238,7 +238,10 @@ class ProxyLogging:
litellm_params = kwargs.get("litellm_params", {})
model = kwargs.get("model", "")
api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
messages = kwargs.get("messages", "")
messages = kwargs.get("messages", None)
# if messages does not exist fallback to "input"
if messages is None:
messages = kwargs.get("input", None)
# only use first 100 chars for alerting
_messages = str(messages)[:100]
@ -282,7 +285,10 @@ class ProxyLogging:
):
if request_data is not None:
model = request_data.get("model", "")
messages = request_data.get("messages", "")
messages = request_data.get("messages", None)
if messages is None:
# if messages does not exist fallback to "input"
messages = request_data.get("input", None)
trace_id = request_data.get("metadata", {}).get(
"trace_id", None
) # get langfuse trace id

View file

@ -26,7 +26,12 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
CustomHTTPTransport,
AsyncCustomHTTPTransport,
)
from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
from litellm.utils import (
ModelResponse,
CustomStreamWrapper,
get_utc_datetime,
calculate_max_parallel_requests,
)
import copy
from litellm._logging import verbose_router_logger
import logging
@ -61,6 +66,7 @@ class Router:
num_retries: int = 0,
timeout: Optional[float] = None,
default_litellm_params={}, # default params for Router.chat.completion.create
default_max_parallel_requests: Optional[int] = None,
set_verbose: bool = False,
debug_level: Literal["DEBUG", "INFO"] = "INFO",
fallbacks: List = [],
@ -198,6 +204,7 @@ class Router:
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
self.default_max_parallel_requests = default_max_parallel_requests
if model_list:
model_list = copy.deepcopy(model_list)
@ -213,6 +220,7 @@ class Router:
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
self.num_retries = num_retries or litellm.num_retries or 0
self.timeout = timeout or litellm.request_timeout
self.retry_after = retry_after
self.routing_strategy = routing_strategy
self.fallbacks = fallbacks or litellm.fallbacks
@ -298,7 +306,7 @@ class Router:
else:
litellm.failure_callback = [self.deployment_callback_on_failure]
verbose_router_logger.info(
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
)
self.routing_strategy_args = routing_strategy_args
@ -496,7 +504,9 @@ class Router:
)
rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
)
if rpm_semaphore is not None and isinstance(
@ -681,7 +691,9 @@ class Router:
### CONCURRENCY-SAFE RPM CHECKS ###
rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
)
if rpm_semaphore is not None and isinstance(
@ -803,7 +815,9 @@ class Router:
### CONCURRENCY-SAFE RPM CHECKS ###
rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
)
if rpm_semaphore is not None and isinstance(
@ -1049,7 +1063,9 @@ class Router:
)
rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
)
if rpm_semaphore is not None and isinstance(
@ -1243,7 +1259,9 @@ class Router:
### CONCURRENCY-SAFE RPM CHECKS ###
rpm_semaphore = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
deployment=deployment,
kwargs=kwargs,
client_type="max_parallel_requests",
)
if rpm_semaphore is not None and isinstance(
@ -1862,17 +1880,23 @@ class Router:
model_id = model["model_info"]["id"]
# ### IF RPM SET - initialize a semaphore ###
rpm = litellm_params.get("rpm", None)
if rpm:
semaphore = asyncio.Semaphore(rpm)
cache_key = f"{model_id}_rpm_client"
tpm = litellm_params.get("tpm", None)
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
calculated_max_parallel_requests = calculate_max_parallel_requests(
rpm=rpm,
max_parallel_requests=max_parallel_requests,
tpm=tpm,
default_max_parallel_requests=self.default_max_parallel_requests,
)
if calculated_max_parallel_requests:
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
cache_key = f"{model_id}_max_parallel_requests_client"
self.cache.set_cache(
key=cache_key,
value=semaphore,
local_only=True,
)
# print("STORES SEMAPHORE IN CACHE")
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
custom_llm_provider = litellm_params.get("custom_llm_provider")
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
@ -2537,8 +2561,8 @@ class Router:
The appropriate client based on the given client_type and kwargs.
"""
model_id = deployment["model_info"]["id"]
if client_type == "rpm_client":
cache_key = "{}_rpm_client".format(model_id)
if client_type == "max_parallel_requests":
cache_key = "{}_max_parallel_requests_client".format(model_id)
client = self.cache.get_cache(key=cache_key, local_only=True)
return client
elif client_type == "async":
@ -2778,6 +2802,7 @@ class Router:
"""
if (
self.routing_strategy != "usage-based-routing-v2"
and self.routing_strategy != "simple-shuffle"
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
return self.get_available_deployment(
model=model,
@ -2828,6 +2853,25 @@ class Router:
messages=messages,
input=input,
)
elif self.routing_strategy == "simple-shuffle":
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
############## Check if we can do a RPM/TPM based weighted pick #################
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
if rpm is not None:
# use weight-random pick if rpms provided
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
verbose_router_logger.debug(f"\nrpms {rpms}")
total_rpm = sum(rpms)
weights = [rpm / total_rpm for rpm in rpms]
verbose_router_logger.debug(f"\n weights {weights}")
# Perform weighted random pick
selected_index = random.choices(range(len(rpms)), weights=weights)[0]
verbose_router_logger.debug(f"\n selected index, {selected_index}")
deployment = healthy_deployments[selected_index]
verbose_router_logger.info(
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
)
return deployment or deployment[0]
if deployment is None:
verbose_router_logger.info(

View file

@ -407,13 +407,15 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
tpm_keys.append(tpm_key)
rpm_keys.append(rpm_key)
tpm_values = await self.router_cache.async_batch_get_cache(
keys=tpm_keys
) # [1, 2, None, ..]
rpm_values = await self.router_cache.async_batch_get_cache(
keys=rpm_keys
combined_tpm_rpm_keys = tpm_keys + rpm_keys
combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache(
keys=combined_tpm_rpm_keys
) # [1, 2, None, ..]
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
return self._common_checks_available_deployment(
model_group=model_group,
healthy_deployments=healthy_deployments,

View file

@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling():
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)
messages.append(
response.choices[0].message.model_dump()
) # Add assistant tool invokes
tool_result = (
'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
)
# Add user submitted tool results in the OpenAI format
messages.append(
{
"tool_call_id": response.choices[0].message.tool_calls[0].id,
"role": "tool",
"name": response.choices[0].message.tool_calls[0].function.name,
"content": tool_result,
}
)
# In the second response, Claude should deduce answer from tool results
second_response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=messages,
tools=tools,
tool_choice="auto",
)
print(f"second response: {second_response}")
assert isinstance(second_response.choices[0].message.content, str)
except RateLimitError:
pass
except Exception as e:

View file

@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client):
await litellm.proxy.proxy_server.prisma_client.connect()
from litellm.proxy.proxy_server import user_api_key_cache
await new_team(
NewTeamRequest(
team_id="ishaan-special-team",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
_response = await new_user(
data=NewUserRequest(
models=["azure-gpt-3.5"],
@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client):
async def test():
await litellm.proxy.proxy_server.prisma_client.connect()
# create team "litellm-core-infra@gmail.com""
print("creating team litellm-core-infra@gmail.com")
await new_team(
NewTeamRequest(
team_id="litellm-core-infra@gmail.com",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
await new_team(
NewTeamRequest(
team_id="ishaan-special-team",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
request = NewUserRequest(
metadata={"team": "litellm-team3", "project": "litellm-project3"},
metadata={"project": "litellm-project3"},
team_id="litellm-core-infra@gmail.com",
)
key = await new_user(request)
print(key)
@ -1015,7 +1046,6 @@ def test_generate_and_update_key(prisma_client):
print("\n info for key=", result["info"])
assert result["info"]["max_parallel_requests"] == None
assert result["info"]["metadata"] == {
"team": "litellm-team3",
"project": "litellm-project3",
}
assert result["info"]["team_id"] == "litellm-core-infra@gmail.com"
@ -1037,7 +1067,7 @@ def test_generate_and_update_key(prisma_client):
# update the team id
response2 = await update_key_fn(
request=Request,
data=UpdateKeyRequest(key=generated_key, team_id="ishaan"),
data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"),
)
print("response2=", response2)
@ -1048,11 +1078,10 @@ def test_generate_and_update_key(prisma_client):
print("\n info for key=", result["info"])
assert result["info"]["max_parallel_requests"] == None
assert result["info"]["metadata"] == {
"team": "litellm-team3",
"project": "litellm-project3",
}
assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"]
assert result["info"]["team_id"] == "ishaan"
assert result["info"]["team_id"] == "ishaan-special-team"
# cleanup - delete key
delete_key_request = KeyRequest(keys=[generated_key])
@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client):
await litellm.proxy.proxy_server.prisma_client.connect()
from litellm.proxy.proxy_server import user_api_key_cache
await new_team(
NewTeamRequest(
team_id="ishaans-special-team",
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
),
)
_response = await new_user(
data=NewUserRequest(
models=["azure-gpt-3.5"],

View file

@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
# Define the expected log messages
# - error request, falling back notice, success notice
expected_logs = [
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None",
"Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None",
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
"Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",

View file

@ -0,0 +1,115 @@
# What is this?
## Unit tests for the max_parallel_requests feature on Router
import sys, os, time, inspect, asyncio, traceback
from datetime import datetime
import pytest
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.utils import calculate_max_parallel_requests
from typing import Optional
"""
- only rpm
- only tpm
- only max_parallel_requests
- max_parallel_requests + rpm
- max_parallel_requests + tpm
- max_parallel_requests + tpm + rpm
"""
max_parallel_requests_values = [None, 10]
tpm_values = [None, 20, 300000]
rpm_values = [None, 30]
default_max_parallel_requests = [None, 40]
@pytest.mark.parametrize(
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
[
(mp, tp, rp, dmp)
for mp in max_parallel_requests_values
for tp in tpm_values
for rp in rpm_values
for dmp in default_max_parallel_requests
],
)
def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests):
calculated_max_parallel_requests = calculate_max_parallel_requests(
max_parallel_requests=max_parallel_requests,
rpm=rpm,
tpm=tpm,
default_max_parallel_requests=default_max_parallel_requests,
)
if max_parallel_requests is not None:
assert max_parallel_requests == calculated_max_parallel_requests
elif rpm is not None:
assert rpm == calculated_max_parallel_requests
elif tpm is not None:
calculated_rpm = int(tpm / 1000 / 6)
if calculated_rpm == 0:
calculated_rpm = 1
print(
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}"
)
assert calculated_rpm == calculated_max_parallel_requests
elif default_max_parallel_requests is not None:
assert calculated_max_parallel_requests == default_max_parallel_requests
else:
assert calculated_max_parallel_requests is None
@pytest.mark.parametrize(
"max_parallel_requests, tpm, rpm, default_max_parallel_requests",
[
(mp, tp, rp, dmp)
for mp in max_parallel_requests_values
for tp in tpm_values
for rp in rpm_values
for dmp in default_max_parallel_requests
],
)
def test_setting_mpr_limits_per_model(
max_parallel_requests, tpm, rpm, default_max_parallel_requests
):
deployment = {
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"max_parallel_requests": max_parallel_requests,
"tpm": tpm,
"rpm": rpm,
},
"model_info": {"id": "my-unique-id"},
}
router = litellm.Router(
model_list=[deployment],
default_max_parallel_requests=default_max_parallel_requests,
)
mpr_client: Optional[asyncio.Semaphore] = router._get_client(
deployment=deployment,
kwargs={},
client_type="max_parallel_requests",
)
if max_parallel_requests is not None:
assert max_parallel_requests == mpr_client._value
elif rpm is not None:
assert rpm == mpr_client._value
elif tpm is not None:
calculated_rpm = int(tpm / 1000 / 6)
if calculated_rpm == 0:
calculated_rpm = 1
print(
f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}"
)
assert calculated_rpm == mpr_client._value
elif default_max_parallel_requests is not None:
assert mpr_client._value == default_max_parallel_requests
else:
assert mpr_client is None
# raise Exception("it worked!")

View file

@ -5434,6 +5434,49 @@ def get_optional_params(
return optional_params
def calculate_max_parallel_requests(
max_parallel_requests: Optional[int],
rpm: Optional[int],
tpm: Optional[int],
default_max_parallel_requests: Optional[int],
) -> Optional[int]:
"""
Returns the max parallel requests to send to a deployment.
Used in semaphore for async requests on router.
Parameters:
- max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
- rpm - Optional[int] - requests per minute allowed for that deployment
- tpm - Optional[int] - tokens per minute allowed for that deployment
- default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
Returns:
- int or None (if all params are None)
Order:
max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
Azure RPM formula:
6 rpm per 1000 TPM
https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
"""
if max_parallel_requests is not None:
return max_parallel_requests
elif rpm is not None:
return rpm
elif tpm is not None:
calculated_rpm = int(tpm / 1000 / 6)
if calculated_rpm == 0:
calculated_rpm = 1
return calculated_rpm
elif default_max_parallel_requests is not None:
return default_max_parallel_requests
return None
def get_api_base(model: str, optional_params: dict) -> Optional[str]:
"""
Returns the api base used for calling the model.

View file

@ -96,9 +96,9 @@ litellm_settings:
router_settings:
routing_strategy: usage-based-routing-v2
redis_host: os.environ/REDIS_HOST
redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT
# redis_host: os.environ/REDIS_HOST
# redis_password: os.environ/REDIS_PASSWORD
# redis_port: os.environ/REDIS_PORT
enable_pre_call_checks: true
general_settings:

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.35.17"
version = "1.35.18"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.35.17"
version = "1.35.18"
version_files = [
"pyproject.toml:^version"
]

View file

@ -14,6 +14,24 @@ sys.path.insert(
import litellm
async def generate_team(session):
url = "http://0.0.0.0:4000/team/new"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
data = {
"team_id": "litellm-dashboard",
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(f"Response (Status code: {status}):")
print(response_text)
print()
_json_response = await response.json()
return _json_response
async def generate_user(
session,
user_role="app_owner",
@ -668,7 +686,7 @@ async def test_key_rate_limit():
@pytest.mark.asyncio
async def test_key_delete():
async def test_key_delete_ui():
"""
Admin UI flow - DO NOT DELETE
-> Create a key with user_id = "ishaan"
@ -680,6 +698,8 @@ async def test_key_delete():
key = key_gen["key"]
# generate a admin UI key
team = await generate_team(session=session)
print("generated team: ", team)
admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
print(
"trying to delete key=",

View file

@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit():
await asyncio.gather(*tasks)
pytest.fail("Expected at least 1 call to fail")
except Exception as e:
if "Request did not return a 200 status code: 429" in str(e):
pass
else:
pytest.fail(f"Wrong error received - {str(e)}")
@pytest.mark.asyncio

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[16586,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-93ac11fb17dce9d6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Oe7aA-U7OV9Y13gspREJQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75b5d58291566cf9.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[65249,[\"968\",\"static/chunks/968-0cc23fee51b47e4e.js\",\"931\",\"static/chunks/app/page-6ba29bc4256320f4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/dc347b0d22ffde5d.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Vjlnu8AomhCFg4fkGtcUs\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[16586,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-93ac11fb17dce9d6.js"],""]
3:I[65249,["968","static/chunks/968-0cc23fee51b47e4e.js","931","static/chunks/app/page-6ba29bc4256320f4.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["Oe7aA-U7OV9Y13gspREJQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["Vjlnu8AomhCFg4fkGtcUs",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/dc347b0d22ffde5d.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -9,6 +9,7 @@ import Teams from "@/components/teams";
import AdminPanel from "@/components/admins";
import Settings from "@/components/settings";
import GeneralSettings from "@/components/general_settings";
import APIRef from "@/components/api_ref";
import ChatUI from "@/components/chat_ui";
import Sidebar from "../components/leftnav";
import Usage from "../components/usage";
@ -165,6 +166,8 @@ const CreateKeyPage = () => {
accessToken={accessToken}
showSSOBanner={showSSOBanner}
/>
) : page == "api_ref" ? (
<APIRef/>
) : page == "settings" ? (
<Settings
userID={userID}

View file

@ -0,0 +1,152 @@
"use client";
import React, { useEffect, useState } from "react";
import {
Badge,
Card,
Table,
Metric,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Text,
Title,
Icon,
Accordion,
AccordionBody,
AccordionHeader,
List,
ListItem,
Tab,
TabGroup,
TabList,
TabPanel,
TabPanels,
Grid,
} from "@tremor/react";
import { Statistic } from "antd"
import { modelAvailableCall } from "./networking";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
const APIRef = ({}) => {
return (
<>
<Grid className="gap-2 p-8 h-[80vh] w-full mt-2">
<div className="mb-5">
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">OpenAI Compatible Proxy: API Reference</p>
<Text className="mt-2 mb-2">LiteLLM is OpenAI Compatible. This means your API Key works with the OpenAI SDK. Just replace the base_url to point to your litellm proxy. Example Below </Text>
<TabGroup>
<TabList>
<Tab>OpenAI Python SDK</Tab>
<Tab>LlamaIndex</Tab>
<Tab>Langchain Py</Tab>
</TabList>
<TabPanels>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import openai
client = openai.OpenAI(
api_key="your_api_key",
base_url="http://0.0.0.0:4000" # LiteLLM Proxy is OpenAI compatible, Read More: https://docs.litellm.ai/docs/proxy/user_keys
)
response = client.chat.completions.create(
model="gpt-3.5-turbo", # model to send to the proxy
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
]
)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import os, dotenv
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
llm = AzureOpenAI(
engine="azure-gpt-3.5", # model_name on litellm proxy
temperature=0.0,
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
api_key="sk-1234", # litellm proxy API Key
api_version="2023-07-01-preview",
)
embed_model = AzureOpenAIEmbedding(
deployment_name="azure-embedding-model",
azure_endpoint="http://0.0.0.0:4000",
api_key="sk-1234",
api_version="2023-07-01-preview",
)
documents = SimpleDirectoryReader("llama_index_data").load_data()
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
</TabPanels>
</TabGroup>
</div>
</Grid>
</>
)
}
export default APIRef;

View file

@ -13,12 +13,12 @@ import {
TabGroup,
TabList,
TabPanel,
TabPanels,
Metric,
Col,
Text,
SelectItem,
TextInput,
TabPanels,
Button,
} from "@tremor/react";
@ -201,7 +201,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
<TabGroup>
<TabList>
<Tab>Chat</Tab>
<Tab>API Reference</Tab>
</TabList>
<TabPanels>
@ -272,124 +271,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
</div>
</div>
</TabPanel>
<TabPanel>
<TabGroup>
<TabList>
<Tab>OpenAI Python SDK</Tab>
<Tab>LlamaIndex</Tab>
<Tab>Langchain Py</Tab>
</TabList>
<TabPanels>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import openai
client = openai.OpenAI(
api_key="your_api_key",
base_url="http://0.0.0.0:4000" # proxy base url
)
response = client.chat.completions.create(
model="gpt-3.5-turbo", # model to use from Models Tab
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"generation_name": "ishaan-generation-openai-client",
"generation_id": "openai-client-gen-id22",
"trace_id": "openai-client-trace-id22",
"trace_user_id": "openai-client-user-id2"
}
}
)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import os, dotenv
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
llm = AzureOpenAI(
engine="azure-gpt-3.5", # model_name on litellm proxy
temperature=0.0,
azure_endpoint="http://0.0.0.0:4000", # litellm proxy endpoint
api_key="sk-1234", # litellm proxy API Key
api_version="2023-07-01-preview",
)
embed_model = AzureOpenAIEmbedding(
deployment_name="azure-embedding-model",
azure_endpoint="http://0.0.0.0:4000",
api_key="sk-1234",
api_version="2023-07-01-preview",
)
documents = SimpleDirectoryReader("llama_index_data").load_data()
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:8000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"generation_name": "ishaan-generation-langchain-client",
"generation_id": "langchain-client-gen-id22",
"trace_id": "langchain-client-trace-id22",
"trace_user_id": "langchain-client-user-id2"
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
`}
</SyntaxHighlighter>
</TabPanel>
</TabPanels>
</TabGroup>
</TabPanel>
</TabPanels>
</TabGroup>
</Card>

View file

@ -2,7 +2,7 @@
import React, { useState, useEffect, useRef } from "react";
import { Button, TextInput, Grid, Col } from "@tremor/react";
import { Card, Metric, Text, Title, Subtitle } from "@tremor/react";
import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react";
import { CopyToClipboard } from 'react-copy-to-clipboard';
import {
Button as Button2,
@ -147,6 +147,17 @@ const CreateKey: React.FC<CreateKeyProps> = ({
mode="multiple"
placeholder="Select models"
style={{ width: "100%" }}
onChange={(values) => {
// Check if "All Team Models" is selected
const isAllTeamModelsSelected = values.includes("all-team-models");
// If "All Team Models" is selected, deselect all other models
if (isAllTeamModelsSelected) {
const newValues = ["all-team-models"];
// You can call the form's setFieldsValue method to update the value
form.setFieldsValue({ models: newValues });
}
}}
>
<Option key="all-team-models" value="all-team-models">
All Team Models
@ -248,16 +259,153 @@ const CreateKey: React.FC<CreateKeyProps> = ({
</>
) : (
<>
<Form.Item label="Key Name" name="key_alias">
<Form.Item
label="Key Name"
name="key_alias"
rules={[{ required: true, message: 'Please input a key name' }]}
help="required"
>
<Input />
</Form.Item>
<Form.Item label="Team ID (Contact Group)" name="team_id">
<Input placeholder="default team (create a new team)" />
<Form.Item
label="Team ID"
name="team_id"
hidden={true}
initialValue={team ? team["team_id"] : null}
valuePropName="team_id"
className="mt-8"
>
<Input value={team ? team["team_alias"] : ""} disabled />
</Form.Item>
<Form.Item label="Description" name="description">
<Input.TextArea placeholder="Enter description" rows={4} />
<Form.Item
label="Models"
name="models"
className="mb-12"
rules={[{ required: true, message: 'Please select a model' }]}
help="required"
>
<Select
mode="multiple"
placeholder="Select models"
style={{ width: "100%" }}
onChange={(values) => {
const isAllTeamModelsSelected = values.includes("all-team-models");
if (isAllTeamModelsSelected) {
const newValues = ["all-team-models"];
form.setFieldsValue({ models: newValues });
}
}}
>
<Option key="all-team-models" value="all-team-models">
All Team Models
</Option>
{team && team.models ? (
team.models.includes("all-proxy-models") ? (
userModels.map((model: string) => (
(
<Option key={model} value={model}>
{model}
</Option>
)
))
) : (
team.models.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)
) : (
userModels.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)}
</Select>
</Form.Item>
<Accordion className="mt-20 mb-8" >
<AccordionHeader>
<b>Optional Settings</b>
</AccordionHeader>
<AccordionBody>
<Form.Item
className="mt-8"
label="Max Budget (USD)"
name="max_budget"
help={`Budget cannot exceed team max budget: $${team?.max_budget !== null && team?.max_budget !== undefined ? team?.max_budget : 'unlimited'}`}
rules={[
{
validator: async (_, value) => {
if (value && team && team.max_budget !== null && value > team.max_budget) {
throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`);
}
},
},
]}
>
<InputNumber step={0.01} precision={2} width={200} />
</Form.Item>
<Form.Item
className="mt-8"
label="Reset Budget"
name="budget_duration"
help={`Team Reset Budget: ${team?.budget_duration !== null && team?.budget_duration !== undefined ? team?.budget_duration : 'None'}`}
>
<Select defaultValue={null} placeholder="n/a">
<Select.Option value="24h">daily</Select.Option>
<Select.Option value="30d">monthly</Select.Option>
</Select>
</Form.Item>
<Form.Item
className="mt-8"
label="Tokens per minute Limit (TPM)"
name="tpm_limit"
help={`TPM cannot exceed team TPM limit: ${team?.tpm_limit !== null && team?.tpm_limit !== undefined ? team?.tpm_limit : 'unlimited'}`}
rules={[
{
validator: async (_, value) => {
if (value && team && team.tpm_limit !== null && value > team.tpm_limit) {
throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`);
}
},
},
]}
>
<InputNumber step={1} width={400} />
</Form.Item>
<Form.Item
className="mt-8"
label="Requests per minute Limit (RPM)"
name="rpm_limit"
help={`RPM cannot exceed team RPM limit: ${team?.rpm_limit !== null && team?.rpm_limit !== undefined ? team?.rpm_limit : 'unlimited'}`}
rules={[
{
validator: async (_, value) => {
if (value && team && team.rpm_limit !== null && value > team.rpm_limit) {
throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`);
}
},
},
]}
>
<InputNumber step={1} width={400} />
</Form.Item>
<Form.Item label="Expire Key (eg: 30s, 30h, 30d)" name="duration" className="mt-8">
<Input />
</Form.Item>
<Form.Item label="Metadata" name="metadata">
<Input.TextArea rows={4} placeholder="Enter metadata as JSON" />
</Form.Item>
</AccordionBody>
</Accordion>
</>
)}
<div style={{ textAlign: "right", marginTop: "10px" }}>

View file

@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react";
interface DashboardTeamProps {
teams: Object[] | null;
setSelectedTeam: React.Dispatch<React.SetStateAction<any | null>>;
userRole: string | null;
}
type TeamInterface = {
@ -15,6 +16,7 @@ type TeamInterface = {
const DashboardTeam: React.FC<DashboardTeamProps> = ({
teams,
setSelectedTeam,
userRole,
}) => {
const defaultTeam: TeamInterface = {
models: [],
@ -25,19 +27,26 @@ const DashboardTeam: React.FC<DashboardTeamProps> = ({
const [value, setValue] = useState(defaultTeam);
const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
let updatedTeams;
if (userRole === "App User") {
// Non-Admin SSO users should only see their own team - they should not see "Default Team"
updatedTeams = teams;
} else {
updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam];
}
if (userRole === 'App User') return null;
return (
<div className="mt-5 mb-5">
<Title>Select Team</Title>
<Text>
If you belong to multiple teams, this setting controls which team is
used by default when creating new API Keys.
If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys.
</Text>
<Text className="mt-3 mb-3">
<b>Default Team:</b> If no team_id is set for a key, it will be grouped under here.
</Text>
{updatedTeams && updatedTeams.length > 0 ? (
<Select defaultValue="0">
{updatedTeams.map((team: any, index) => (

View file

@ -46,8 +46,8 @@ const Sidebar: React.FC<SidebarProps> = ({
);
}
return (
<Layout style={{ minHeight: "100vh", maxWidth: "120px" }}>
<Sider width={120}>
<Layout style={{ minHeight: "100vh", maxWidth: "130px" }}>
<Sider width={130}>
<Menu
mode="inline"
defaultSelectedKeys={defaultSelectedKey ? defaultSelectedKey : ["1"]}
@ -63,11 +63,23 @@ const Sidebar: React.FC<SidebarProps> = ({
Test Key
</Text>
</Menu.Item>
<Menu.Item key="11" onClick={() => setPage("api_ref")}>
<Text>
API Reference
</Text>
</Menu.Item>
{
userRole == "Admin" ? (
<Menu.Item key="2" onClick={() => setPage("models")}>
<Text>
Models
</Text>
</Menu.Item>
) : null
}
{userRole == "Admin" ? (
<Menu.Item key="6" onClick={() => setPage("teams")}>
<Text>
@ -75,11 +87,18 @@ const Sidebar: React.FC<SidebarProps> = ({
</Text>
</Menu.Item>
) : null}
{
userRole == "Admin" ? (
<Menu.Item key="4" onClick={() => setPage("usage")}>
<Text>
Usage
</Text>
</Menu.Item>
) : null
}
{userRole == "Admin" ? (
<Menu.Item key="5" onClick={() => setPage("users")}>
<Text>
@ -87,16 +106,27 @@ const Sidebar: React.FC<SidebarProps> = ({
</Text>
</Menu.Item>
) : null}
{
userRole == "Admin" ? (
<Menu.Item key="8" onClick={() => setPage("settings")}>
<Text>
Integrations
</Text>
</Menu.Item>
) : null
}
{
userRole == "Admin" ? (
<Menu.Item key="9" onClick={() => setPage("general-settings")}>
<Text>
Settings
</Text>
</Menu.Item>
) : null
}
{userRole == "Admin" ? (
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
<Text>

View file

@ -296,6 +296,9 @@ export const userInfoCall = async (
if (userRole == "App Owner" && userID) {
url = `${url}?user_id=${userID}`;
}
if (userRole == "App User" && userID) {
url = `${url}?user_id=${userID}`;
}
console.log("in userInfoCall viewAll=", viewAll);
if (viewAll && page_size && (page != null) && (page != undefined)) {
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;

View file

@ -5,6 +5,7 @@ import { Grid, Col, Card, Text, Title } from "@tremor/react";
import CreateKey from "./create_key_button";
import ViewKeyTable from "./view_key_table";
import ViewUserSpend from "./view_user_spend";
import ViewUserTeam from "./view_user_team";
import DashboardTeam from "./dashboard_default_team";
import { useSearchParams, useRouter } from "next/navigation";
import { jwtDecode } from "jwt-decode";
@ -232,11 +233,19 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
<div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
<Col numColSpan={1}>
<ViewUserTeam
userID={userID}
userRole={userRole}
selectedTeam={selectedTeam ? selectedTeam : null}
accessToken={accessToken}
/>
<ViewUserSpend
userID={userID}
userRole={userRole}
accessToken={accessToken}
userSpend={teamSpend}
selectedTeam = {selectedTeam ? selectedTeam : null}
/>
<ViewKeyTable
@ -257,7 +266,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
data={keys}
setData={setKeys}
/>
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} />
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
</Col>
</Grid>
</div>

View file

@ -2,7 +2,7 @@
import React, { useEffect, useState } from "react";
import { keyDeleteCall, getTotalSpendCall } from "./networking";
import { StatusOnlineIcon, TrashIcon } from "@heroicons/react/outline";
import { DonutChart } from "@tremor/react";
import { Accordion, AccordionHeader, AccordionList, DonutChart } from "@tremor/react";
import {
Badge,
Card,
@ -16,9 +16,13 @@ import {
Text,
Title,
Icon,
AccordionBody,
List,
ListItem,
} from "@tremor/react";
import { Statistic } from "antd"
import { spendUsersCall } from "./networking";
import { spendUsersCall, modelAvailableCall } from "./networking";
// Define the props type
@ -32,11 +36,13 @@ interface ViewUserSpendProps {
userRole: string | null;
accessToken: string | null;
userSpend: number | null;
selectedTeam: any | null;
}
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend }) => {
const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessToken, userSpend, selectedTeam }) => {
console.log(`userSpend: ${userSpend}`)
let [spend, setSpend] = useState(userSpend !== null ? userSpend : 0.0);
const [maxBudget, setMaxBudget] = useState(0.0);
const [userModels, setUserModels] = useState([]);
useEffect(() => {
const fetchData = async () => {
if (!accessToken || !userID || !userRole) {
@ -62,9 +68,30 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
}
}
};
const fetchUserModels = async () => {
try {
if (userID === null || userRole === null) {
return;
}
if (accessToken !== null) {
const model_available = await modelAvailableCall(accessToken, userID, userRole);
let available_model_names = model_available["data"].map(
(element: { id: string }) => element.id
);
console.log("available_model_names:", available_model_names);
setUserModels(available_model_names);
}
} catch (error) {
console.error("Error fetching user models:", error);
}
};
fetchUserModels();
fetchData();
}, [userRole, accessToken]);
}, [userRole, accessToken, userID]);
useEffect(() => {
if (userSpend !== null) {
@ -72,18 +99,50 @@ const ViewUserSpend: React.FC<ViewUserSpendProps> = ({ userID, userRole, accessT
}
}, [userSpend])
// logic to decide what models to display
let modelsToDisplay = [];
if (selectedTeam && selectedTeam.models) {
modelsToDisplay = selectedTeam.models;
}
// check if "all-proxy-models" is in modelsToDisplay
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
console.log("user models:", userModels);
modelsToDisplay = userModels;
}
const displayMaxBudget = maxBudget !== null ? `$${maxBudget} limit` : "No limit";
const roundedSpend = spend !== undefined ? spend.toFixed(4) : null;
console.log(`spend in view user spend: ${spend}`)
return (
<>
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">Total Spend </p>
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">${roundedSpend}</p>
</>
)
<div className="flex items-center">
<div>
<p className="text-tremor-default text-tremor-content dark:text-dark-tremor-content">
Total Spend{" "}
</p>
<p className="text-2xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">
${roundedSpend}
</p>
</div>
<div className="ml-auto">
<Accordion>
<AccordionHeader>Models</AccordionHeader>
<AccordionBody className="absolute right-0 z-10 bg-white p-2 shadow-lg max-w-xs">
<List>
{modelsToDisplay.map((model: string) => (
<ListItem key={model}>
<Text>{model}</Text>
</ListItem>
))}
</List>
</AccordionBody>
</Accordion>
</div>
</div>
);
}
export default ViewUserSpend;

View file

@ -0,0 +1,78 @@
"use client";
import React, { useEffect, useState } from "react";
import {
Badge,
Card,
Table,
Metric,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Text,
Title,
Icon,
Accordion,
AccordionBody,
AccordionHeader,
List,
ListItem,
} from "@tremor/react";
import { Statistic } from "antd"
import { modelAvailableCall } from "./networking";
interface ViewUserTeamProps {
userID: string | null;
userRole: string | null;
selectedTeam: any | null;
accessToken: string | null;
}
const ViewUserTeam: React.FC<ViewUserTeamProps> = ({ userID, userRole, selectedTeam, accessToken}) => {
const [userModels, setUserModels] = useState([]);
useEffect(() => {
const fetchUserModels = async () => {
try {
if (userID === null || userRole === null) {
return;
}
if (accessToken !== null) {
const model_available = await modelAvailableCall(accessToken, userID, userRole);
let available_model_names = model_available["data"].map(
(element: { id: string }) => element.id
);
console.log("available_model_names:", available_model_names);
setUserModels(available_model_names);
}
} catch (error) {
console.error("Error fetching user models:", error);
}
};
fetchUserModels();
}, [accessToken, userID, userRole]);
// logic to decide what models to display
let modelsToDisplay = [];
if (selectedTeam && selectedTeam.models) {
modelsToDisplay = selectedTeam.models;
}
// check if "all-proxy-models" is in modelsToDisplay
if (modelsToDisplay && modelsToDisplay.includes("all-proxy-models")) {
console.log("user models:", userModels);
modelsToDisplay = userModels;
}
return (
<>
<div className="mb-5">
<p className="text-3xl text-tremor-content-strong dark:text-dark-tremor-content-strong font-semibold">{selectedTeam?.team_alias}</p>
</div>
</>
)
}
export default ViewUserTeam;