Merge branch 'BerriAI:main' into main

This commit is contained in:
CyanideByte 2024-04-27 20:51:33 -07:00 committed by GitHub
commit 82be9a7e67
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 341 additions and 183 deletions

View file

@ -1,13 +1,13 @@
# Slack Alerting # 🚨 Alerting
Get alerts for: Get alerts for:
- hanging LLM api calls - Hanging LLM api calls
- failed LLM api calls - Failed LLM api calls
- slow LLM api calls - Slow LLM api calls
- budget Tracking per key/user: - Budget Tracking per key/user:
- When a User/Key crosses their Budget - When a User/Key crosses their Budget
- When a User/Key is 15% away from crossing their Budget - When a User/Key is 15% away from crossing their Budget
- failed db read/writes - Failed db read/writes
## Quick Start ## Quick Start

View file

@ -43,6 +43,12 @@ const sidebars = {
"proxy/user_keys", "proxy/user_keys",
"proxy/enterprise", "proxy/enterprise",
"proxy/virtual_keys", "proxy/virtual_keys",
"proxy/alerting",
{
type: "category",
label: "Logging",
items: ["proxy/logging", "proxy/streaming_logging"],
},
"proxy/team_based_routing", "proxy/team_based_routing",
"proxy/ui", "proxy/ui",
"proxy/cost_tracking", "proxy/cost_tracking",
@ -58,11 +64,6 @@ const sidebars = {
"proxy/pii_masking", "proxy/pii_masking",
"proxy/prompt_injection", "proxy/prompt_injection",
"proxy/caching", "proxy/caching",
{
type: "category",
label: "Logging, Alerting",
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
},
"proxy/prometheus", "proxy/prometheus",
"proxy/call_hooks", "proxy/call_hooks",
"proxy/rules", "proxy/rules",

View file

@ -12,7 +12,9 @@ import litellm
class LangFuseLogger: class LangFuseLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self, langfuse_public_key=None, langfuse_secret=None): def __init__(
self, langfuse_public_key=None, langfuse_secret=None, flush_interval=1
):
try: try:
from langfuse import Langfuse from langfuse import Langfuse
except Exception as e: except Exception as e:
@ -31,7 +33,7 @@ class LangFuseLogger:
host=self.langfuse_host, host=self.langfuse_host,
release=self.langfuse_release, release=self.langfuse_release,
debug=self.langfuse_debug, debug=self.langfuse_debug,
flush_interval=1, # flush interval in seconds flush_interval=flush_interval, # flush interval in seconds
) )
# set the current langfuse project id in the environ # set the current langfuse project id in the environ

View file

@ -12,6 +12,7 @@ from litellm.caching import DualCache
import asyncio import asyncio
import aiohttp import aiohttp
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
import datetime
class SlackAlerting: class SlackAlerting:
@ -47,6 +48,18 @@ class SlackAlerting:
self.internal_usage_cache = DualCache() self.internal_usage_cache = DualCache()
self.async_http_handler = AsyncHTTPHandler() self.async_http_handler = AsyncHTTPHandler()
self.alert_to_webhook_url = alert_to_webhook_url self.alert_to_webhook_url = alert_to_webhook_url
self.langfuse_logger = None
try:
from litellm.integrations.langfuse import LangFuseLogger
self.langfuse_logger = LangFuseLogger(
os.getenv("LANGFUSE_PUBLIC_KEY"),
os.getenv("LANGFUSE_SECRET_KEY"),
flush_interval=1,
)
except:
pass
pass pass
@ -93,39 +106,68 @@ class SlackAlerting:
request_info: str, request_info: str,
request_data: Optional[dict] = None, request_data: Optional[dict] = None,
kwargs: Optional[dict] = None, kwargs: Optional[dict] = None,
type: Literal["hanging_request", "slow_response"] = "hanging_request",
start_time: Optional[datetime.datetime] = None,
end_time: Optional[datetime.datetime] = None,
): ):
import uuid import uuid
# For now: do nothing as we're debugging why this is not working as expected # For now: do nothing as we're debugging why this is not working as expected
if request_data is not None:
trace_id = request_data.get("metadata", {}).get(
"trace_id", None
) # get langfuse trace id
if trace_id is None:
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
request_data["metadata"]["trace_id"] = trace_id
elif kwargs is not None:
_litellm_params = kwargs.get("litellm_params", {})
trace_id = _litellm_params.get("metadata", {}).get(
"trace_id", None
) # get langfuse trace id
if trace_id is None:
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
_litellm_params["metadata"]["trace_id"] = trace_id
# Log hanging request as an error on langfuse
if type == "hanging_request":
if self.langfuse_logger is not None:
_logging_kwargs = copy.deepcopy(request_data)
if _logging_kwargs is None:
_logging_kwargs = {}
_logging_kwargs["litellm_params"] = {}
request_data = request_data or {}
_logging_kwargs["litellm_params"]["metadata"] = request_data.get(
"metadata", {}
)
# log to langfuse in a separate thread
import threading
threading.Thread(
target=self.langfuse_logger.log_event,
args=(
_logging_kwargs,
None,
start_time,
end_time,
None,
print,
"ERROR",
"Requests is hanging",
),
).start()
_langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
_langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
# langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
_langfuse_url = (
f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
)
request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
return request_info return request_info
# if request_data is not None:
# trace_id = request_data.get("metadata", {}).get(
# "trace_id", None
# ) # get langfuse trace id
# if trace_id is None:
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
# request_data["metadata"]["trace_id"] = trace_id
# elif kwargs is not None:
# _litellm_params = kwargs.get("litellm_params", {})
# trace_id = _litellm_params.get("metadata", {}).get(
# "trace_id", None
# ) # get langfuse trace id
# if trace_id is None:
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
# _litellm_params["metadata"]["trace_id"] = trace_id
# _langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
# _langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
# # langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
# _langfuse_url = (
# f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
# )
# request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
# return request_info
def _response_taking_too_long_callback( def _response_taking_too_long_callback(
self, self,
kwargs, # kwargs to completion kwargs, # kwargs to completion
@ -167,6 +209,14 @@ class SlackAlerting:
_deployment_latencies = metadata["_latency_per_deployment"] _deployment_latencies = metadata["_latency_per_deployment"]
if len(_deployment_latencies) == 0: if len(_deployment_latencies) == 0:
return None return None
try:
# try sorting deployments by latency
_deployment_latencies = sorted(
_deployment_latencies.items(), key=lambda x: x[1]
)
_deployment_latencies = dict(_deployment_latencies)
except:
pass
for api_base, latency in _deployment_latencies.items(): for api_base, latency in _deployment_latencies.items():
_message_to_send += f"\n{api_base}: {round(latency,2)}s" _message_to_send += f"\n{api_base}: {round(latency,2)}s"
_message_to_send = "```" + _message_to_send + "```" _message_to_send = "```" + _message_to_send + "```"
@ -194,7 +244,7 @@ class SlackAlerting:
if time_difference_float > self.alerting_threshold: if time_difference_float > self.alerting_threshold:
if "langfuse" in litellm.success_callback: if "langfuse" in litellm.success_callback:
request_info = self._add_langfuse_trace_id_to_alert( request_info = self._add_langfuse_trace_id_to_alert(
request_info=request_info, kwargs=kwargs request_info=request_info, kwargs=kwargs, type="slow_response"
) )
# add deployment latencies to alert # add deployment latencies to alert
if ( if (
@ -222,8 +272,8 @@ class SlackAlerting:
async def response_taking_too_long( async def response_taking_too_long(
self, self,
start_time: Optional[float] = None, start_time: Optional[datetime.datetime] = None,
end_time: Optional[float] = None, end_time: Optional[datetime.datetime] = None,
type: Literal["hanging_request", "slow_response"] = "hanging_request", type: Literal["hanging_request", "slow_response"] = "hanging_request",
request_data: Optional[dict] = None, request_data: Optional[dict] = None,
): ):
@ -243,10 +293,6 @@ class SlackAlerting:
except: except:
messages = "" messages = ""
request_info = f"\nRequest Model: `{model}`\nMessages: `{messages}`" request_info = f"\nRequest Model: `{model}`\nMessages: `{messages}`"
if "langfuse" in litellm.success_callback:
request_info = self._add_langfuse_trace_id_to_alert(
request_info=request_info, request_data=request_data
)
else: else:
request_info = "" request_info = ""
@ -288,6 +334,15 @@ class SlackAlerting:
f"`Requests are hanging - {self.alerting_threshold}s+ request time`" f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
) )
if "langfuse" in litellm.success_callback:
request_info = self._add_langfuse_trace_id_to_alert(
request_info=request_info,
request_data=request_data,
type="hanging_request",
start_time=start_time,
end_time=end_time,
)
# add deployment latencies to alert # add deployment latencies to alert
_deployment_latency_map = self._get_deployment_latencies_to_alert( _deployment_latency_map = self._get_deployment_latencies_to_alert(
metadata=request_data.get("metadata", {}) metadata=request_data.get("metadata", {})

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""] 3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -4,5 +4,20 @@ model_list:
api_key: my-fake-key api_key: my-fake-key
model: openai/my-fake-model model: openai/my-fake-model
model_name: fake-openai-endpoint model_name: fake-openai-endpoint
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model-2
model_name: fake-openai-endpoint
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model-3
model_name: fake-openai-endpoint
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model-4
model_name: fake-openai-endpoint
router_settings: router_settings:
num_retries: 0 num_retries: 0

View file

@ -95,7 +95,15 @@ def common_checks(
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}" f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
) )
# 7. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget # 7. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
if litellm.max_budget > 0 and global_proxy_spend is not None: if (
litellm.max_budget > 0
and global_proxy_spend is not None
# only run global budget checks for OpenAI routes
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
and route in LiteLLMRoutes.openai_routes.value
and route != "/v1/models"
and route != "/models"
):
if global_proxy_spend > litellm.max_budget: if global_proxy_spend > litellm.max_budget:
raise Exception( raise Exception(
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}" f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"

View file

@ -72,7 +72,9 @@ class Router:
## RELIABILITY ## ## RELIABILITY ##
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
timeout: Optional[float] = None, timeout: Optional[float] = None,
default_litellm_params={}, # default params for Router.chat.completion.create default_litellm_params: Optional[
dict
] = None, # default params for Router.chat.completion.create
default_max_parallel_requests: Optional[int] = None, default_max_parallel_requests: Optional[int] = None,
set_verbose: bool = False, set_verbose: bool = False,
debug_level: Literal["DEBUG", "INFO"] = "INFO", debug_level: Literal["DEBUG", "INFO"] = "INFO",
@ -158,6 +160,7 @@ class Router:
router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}]) router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
``` ```
""" """
if semaphore: if semaphore:
self.semaphore = semaphore self.semaphore = semaphore
self.set_verbose = set_verbose self.set_verbose = set_verbose
@ -260,6 +263,7 @@ class Router:
) # dict to store aliases for router, ex. {"gpt-4": "gpt-3.5-turbo"}, all requests with gpt-4 -> get routed to gpt-3.5-turbo group ) # dict to store aliases for router, ex. {"gpt-4": "gpt-3.5-turbo"}, all requests with gpt-4 -> get routed to gpt-3.5-turbo group
# make Router.chat.completions.create compatible for openai.chat.completions.create # make Router.chat.completions.create compatible for openai.chat.completions.create
default_litellm_params = default_litellm_params or {}
self.chat = litellm.Chat(params=default_litellm_params, router_obj=self) self.chat = litellm.Chat(params=default_litellm_params, router_obj=self)
# default litellm args # default litellm args
@ -475,6 +479,7 @@ class Router:
) )
kwargs["model_info"] = deployment.get("model_info", {}) kwargs["model_info"] = deployment.get("model_info", {})
data = deployment["litellm_params"].copy() data = deployment["litellm_params"].copy()
model_name = data["model"] model_name = data["model"]
for k, v in self.default_litellm_params.items(): for k, v in self.default_litellm_params.items():
if ( if (
@ -1453,6 +1458,7 @@ class Router:
await asyncio.sleep(timeout) await asyncio.sleep(timeout)
elif RouterErrors.user_defined_ratelimit_error.value in str(e): elif RouterErrors.user_defined_ratelimit_error.value in str(e):
raise e # don't wait to retry if deployment hits user-defined rate-limit raise e # don't wait to retry if deployment hits user-defined rate-limit
elif hasattr(original_exception, "status_code") and litellm._should_retry( elif hasattr(original_exception, "status_code") and litellm._should_retry(
status_code=original_exception.status_code status_code=original_exception.status_code
): ):
@ -1614,6 +1620,28 @@ class Router:
raise e raise e
raise original_exception raise original_exception
def _router_should_retry(
self, e: Exception, remaining_retries: int, num_retries: int
):
"""
Calculate back-off, then retry
"""
if hasattr(e, "response") and hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
def function_with_retries(self, *args, **kwargs): def function_with_retries(self, *args, **kwargs):
""" """
Try calling the model 3 times. Shuffle between available deployments. Try calling the model 3 times. Shuffle between available deployments.
@ -1633,9 +1661,6 @@ class Router:
return response return response
except Exception as e: except Exception as e:
original_exception = e original_exception = e
verbose_router_logger.debug(
f"num retries in function with retries: {num_retries}"
)
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
if ( if (
isinstance(original_exception, litellm.ContextWindowExceededError) isinstance(original_exception, litellm.ContextWindowExceededError)
@ -1649,6 +1674,11 @@ class Router:
if num_retries > 0: if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception) kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
### RETRY ### RETRY
self._router_should_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
)
for current_attempt in range(num_retries): for current_attempt in range(num_retries):
verbose_router_logger.debug( verbose_router_logger.debug(
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}" f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
@ -1662,34 +1692,11 @@ class Router:
## LOGGING ## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e) kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt remaining_retries = num_retries - current_attempt
if "No models available" in str(e): self._router_should_retry(
timeout = litellm._calculate_retry_after( e=e,
remaining_retries=remaining_retries, remaining_retries=remaining_retries,
max_retries=num_retries, num_retries=num_retries,
min_timeout=self.retry_after,
) )
time.sleep(timeout)
elif (
hasattr(e, "status_code")
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
):
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
raise e
raise original_exception raise original_exception
### HELPER FUNCTIONS ### HELPER FUNCTIONS
@ -1987,6 +1994,8 @@ class Router:
# check if it ends with a trailing slash # check if it ends with a trailing slash
if api_base.endswith("/"): if api_base.endswith("/"):
api_base += "v1/" api_base += "v1/"
elif api_base.endswith("/v1"):
api_base += "/"
else: else:
api_base += "/v1/" api_base += "/v1/"

View file

@ -119,7 +119,9 @@ def test_multiple_deployments_parallel():
# test_multiple_deployments_parallel() # test_multiple_deployments_parallel()
def test_cooldown_same_model_name(): @pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_cooldown_same_model_name(sync_mode):
# users could have the same model with different api_base # users could have the same model with different api_base
# example # example
# azure/chatgpt, api_base: 1234 # azure/chatgpt, api_base: 1234
@ -161,6 +163,7 @@ def test_cooldown_same_model_name():
num_retries=3, num_retries=3,
) # type: ignore ) # type: ignore
if sync_mode:
response = router.completion( response = router.completion(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hello this request will pass"}], messages=[{"role": "user", "content": "hello this request will pass"}],
@ -176,6 +179,23 @@ def test_cooldown_same_model_name():
model_ids[0] != model_ids[1] model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names ) # ensure both models have a uuid added, and they have different names
print("\ngot response\n", response)
else:
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hello this request will pass"}],
)
print(router.model_list)
model_ids = []
for model in router.model_list:
model_ids.append(model["model_info"]["id"])
print("\n litellm model ids ", model_ids)
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
assert (
model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names
print("\ngot response\n", response) print("\ngot response\n", response)
except Exception as e: except Exception as e:
pytest.fail(f"Got unexpected exception on router! - {e}") pytest.fail(f"Got unexpected exception on router! - {e}")

View file

@ -483,6 +483,8 @@ def test_mistral_embeddings():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="local test")
def test_watsonx_embeddings(): def test_watsonx_embeddings():
try: try:
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -201,6 +201,7 @@ async def test_router_atext_completion_streaming():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_router_completion_streaming(): async def test_router_completion_streaming():
litellm.set_verbose = True
messages = [ messages = [
{"role": "user", "content": "Hello, can you generate a 500 words poem?"} {"role": "user", "content": "Hello, can you generate a 500 words poem?"}
] ]
@ -219,9 +220,9 @@ async def test_router_completion_streaming():
{ {
"model_name": "azure-model", "model_name": "azure-model",
"litellm_params": { "litellm_params": {
"model": "azure/gpt-35-turbo", "model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_EUROPE_API_KEY", "api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com", "api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 6, "rpm": 6,
}, },
"model_info": {"id": 2}, "model_info": {"id": 2},
@ -229,9 +230,9 @@ async def test_router_completion_streaming():
{ {
"model_name": "azure-model", "model_name": "azure-model",
"litellm_params": { "litellm_params": {
"model": "azure/gpt-35-turbo", "model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_CANADA_API_KEY", "api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com", "api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 6, "rpm": 6,
}, },
"model_info": {"id": 3}, "model_info": {"id": 3},
@ -262,4 +263,4 @@ async def test_router_completion_streaming():
## check if calls equally distributed ## check if calls equally distributed
cache_dict = router.cache.get_cache(key=cache_key) cache_dict = router.cache.get_cache(key=cache_key)
for k, v in cache_dict.items(): for k, v in cache_dict.items():
assert v == 1 assert v == 1, f"Failed. K={k} called v={v} times, cache_dict={cache_dict}"

View file

@ -57,6 +57,7 @@ def test_router_num_retries_init(num_retries, max_retries):
else: else:
assert getattr(model_client, "max_retries") == 0 assert getattr(model_client, "max_retries") == 0
@pytest.mark.parametrize( @pytest.mark.parametrize(
"timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)] "timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)]
) )
@ -137,6 +138,7 @@ def test_router_azure_ai_studio_init(mistral_api_base):
print(f"uri_reference: {uri_reference}") print(f"uri_reference: {uri_reference}")
assert "/v1/" in uri_reference assert "/v1/" in uri_reference
assert uri_reference.count("v1") == 1
def test_exception_raising(): def test_exception_raising():

View file

@ -203,7 +203,7 @@ def test_timeouts_router():
}, },
}, },
] ]
router = Router(model_list=model_list) router = Router(model_list=model_list, num_retries=0)
print("PASSED !") print("PASSED !")
@ -396,7 +396,9 @@ def test_router_init_gpt_4_vision_enhancements():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_openai_with_organization(): @pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_with_organization(sync_mode):
try: try:
print("Testing OpenAI with organization") print("Testing OpenAI with organization")
model_list = [ model_list = [
@ -418,6 +420,7 @@ def test_openai_with_organization():
print(router.model_list) print(router.model_list)
print(router.model_list[0]) print(router.model_list[0])
if sync_mode:
openai_client = router._get_client( openai_client = router._get_client(
deployment=router.model_list[0], deployment=router.model_list[0],
kwargs={"input": ["hello"], "model": "openai-bad-org"}, kwargs={"input": ["hello"], "model": "openai-bad-org"},
@ -433,7 +436,9 @@ def test_openai_with_organization():
model="openai-bad-org", model="openai-bad-org",
messages=[{"role": "user", "content": "this is a test"}], messages=[{"role": "user", "content": "this is a test"}],
) )
pytest.fail("Request should have failed - This organization does not exist") pytest.fail(
"Request should have failed - This organization does not exist"
)
except Exception as e: except Exception as e:
print("Got exception: " + str(e)) print("Got exception: " + str(e))
assert "No such organization: org-ikDc4ex8NB" in str(e) assert "No such organization: org-ikDc4ex8NB" in str(e)
@ -444,6 +449,36 @@ def test_openai_with_organization():
messages=[{"role": "user", "content": "this is a test"}], messages=[{"role": "user", "content": "this is a test"}],
max_tokens=5, max_tokens=5,
) )
else:
openai_client = router._get_client(
deployment=router.model_list[0],
kwargs={"input": ["hello"], "model": "openai-bad-org"},
client_type="async",
)
print(vars(openai_client))
assert openai_client.organization == "org-ikDc4ex8NB"
# bad org raises error
try:
response = await router.acompletion(
model="openai-bad-org",
messages=[{"role": "user", "content": "this is a test"}],
)
pytest.fail(
"Request should have failed - This organization does not exist"
)
except Exception as e:
print("Got exception: " + str(e))
assert "No such organization: org-ikDc4ex8NB" in str(e)
# good org works
response = await router.acompletion(
model="openai-good-org",
messages=[{"role": "user", "content": "this is a test"}],
max_tokens=5,
)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -57,6 +57,7 @@ def test_router_timeouts():
redis_password=os.getenv("REDIS_PASSWORD"), redis_password=os.getenv("REDIS_PASSWORD"),
redis_port=int(os.getenv("REDIS_PORT")), redis_port=int(os.getenv("REDIS_PORT")),
timeout=10, timeout=10,
num_retries=0,
) )
print("***** TPM SETTINGS *****") print("***** TPM SETTINGS *****")
@ -89,15 +90,15 @@ def test_router_timeouts():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_router_timeouts_bedrock(): async def test_router_timeouts_bedrock():
import openai import openai, uuid
# Model list for OpenAI and Anthropic models # Model list for OpenAI and Anthropic models
model_list = [ _model_list = [
{ {
"model_name": "bedrock", "model_name": "bedrock",
"litellm_params": { "litellm_params": {
"model": "bedrock/anthropic.claude-instant-v1", "model": "bedrock/anthropic.claude-instant-v1",
"timeout": 0.001, "timeout": 0.00001,
}, },
"tpm": 80000, "tpm": 80000,
}, },
@ -105,17 +106,18 @@ async def test_router_timeouts_bedrock():
# Configure router # Configure router
router = Router( router = Router(
model_list=model_list, model_list=_model_list,
routing_strategy="usage-based-routing", routing_strategy="usage-based-routing",
debug_level="DEBUG", debug_level="DEBUG",
set_verbose=True, set_verbose=True,
num_retries=0,
) )
litellm.set_verbose = True litellm.set_verbose = True
try: try:
response = await router.acompletion( response = await router.acompletion(
model="bedrock", model="bedrock",
messages=[{"role": "user", "content": "hello, who are u"}], messages=[{"role": "user", "content": f"hello, who are u {uuid.uuid4()}"}],
) )
print(response) print(response)
pytest.fail("Did not raise error `openai.APITimeoutError`") pytest.fail("Did not raise error `openai.APITimeoutError`")

View file

@ -5488,11 +5488,15 @@ def get_optional_params(
if "decoding_method" in passed_params: if "decoding_method" in passed_params:
extra_body["decoding_method"] = passed_params.pop("decoding_method") extra_body["decoding_method"] = passed_params.pop("decoding_method")
if "min_tokens" in passed_params or "min_new_tokens" in passed_params: if "min_tokens" in passed_params or "min_new_tokens" in passed_params:
extra_body["min_new_tokens"] = passed_params.pop("min_tokens", passed_params.pop("min_new_tokens")) extra_body["min_new_tokens"] = passed_params.pop(
"min_tokens", passed_params.pop("min_new_tokens")
)
if "top_k" in passed_params: if "top_k" in passed_params:
extra_body["top_k"] = passed_params.pop("top_k") extra_body["top_k"] = passed_params.pop("top_k")
if "truncate_input_tokens" in passed_params: if "truncate_input_tokens" in passed_params:
extra_body["truncate_input_tokens"] = passed_params.pop("truncate_input_tokens") extra_body["truncate_input_tokens"] = passed_params.pop(
"truncate_input_tokens"
)
if "length_penalty" in passed_params: if "length_penalty" in passed_params:
extra_body["length_penalty"] = passed_params.pop("length_penalty") extra_body["length_penalty"] = passed_params.pop("length_penalty")
if "time_limit" in passed_params: if "time_limit" in passed_params:
@ -9837,19 +9841,21 @@ class CustomStreamWrapper:
elif isinstance(chunk, (str, bytes)): elif isinstance(chunk, (str, bytes)):
if isinstance(chunk, bytes): if isinstance(chunk, bytes):
chunk = chunk.decode("utf-8") chunk = chunk.decode("utf-8")
if 'generated_text' in chunk: if "generated_text" in chunk:
response = chunk.replace('data: ', '').strip() response = chunk.replace("data: ", "").strip()
parsed_response = json.loads(response) parsed_response = json.loads(response)
else: else:
return {"text": "", "is_finished": False} return {"text": "", "is_finished": False}
else: else:
print_verbose(f"chunk: {chunk} (Type: {type(chunk)})") print_verbose(f"chunk: {chunk} (Type: {type(chunk)})")
raise ValueError(f"Unable to parse response. Original response: {chunk}") raise ValueError(
f"Unable to parse response. Original response: {chunk}"
)
results = parsed_response.get("results", []) results = parsed_response.get("results", [])
if len(results) > 0: if len(results) > 0:
text = results[0].get("generated_text", "") text = results[0].get("generated_text", "")
finish_reason = results[0].get("stop_reason") finish_reason = results[0].get("stop_reason")
is_finished = finish_reason != 'not_finished' is_finished = finish_reason != "not_finished"
return { return {
"text": text, "text": text,
"is_finished": is_finished, "is_finished": is_finished,
@ -10119,16 +10125,6 @@ class CustomStreamWrapper:
elif self.custom_llm_provider == "watsonx": elif self.custom_llm_provider == "watsonx":
response_obj = self.handle_watsonx_stream(chunk) response_obj = self.handle_watsonx_stream(chunk)
completion_obj["content"] = response_obj["text"] completion_obj["content"] = response_obj["text"]
print_verbose(f"completion obj content: {completion_obj['content']}")
if response_obj.get("prompt_tokens") is not None:
prompt_token_count = getattr(model_response.usage, "prompt_tokens", 0)
model_response.usage.prompt_tokens = (prompt_token_count+response_obj["prompt_tokens"])
if response_obj.get("completion_tokens") is not None:
model_response.usage.completion_tokens = response_obj["completion_tokens"]
model_response.usage.total_tokens = (
getattr(model_response.usage, "prompt_tokens", 0)
+ getattr(model_response.usage, "completion_tokens", 0)
)
if response_obj["is_finished"]: if response_obj["is_finished"]:
self.received_finish_reason = response_obj["finish_reason"] self.received_finish_reason = response_obj["finish_reason"]
elif self.custom_llm_provider == "text-completion-openai": elif self.custom_llm_provider == "text-completion-openai":

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.35.30" version = "1.35.31"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.35.30" version = "1.35.31"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""] 3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -39,6 +39,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
const [apiKey, setApiKey] = useState(null); const [apiKey, setApiKey] = useState(null);
const [softBudget, setSoftBudget] = useState(null); const [softBudget, setSoftBudget] = useState(null);
const [userModels, setUserModels] = useState([]); const [userModels, setUserModels] = useState([]);
const [modelsToPick, setModelsToPick] = useState([]);
const handleOk = () => { const handleOk = () => {
setIsModalVisible(false); setIsModalVisible(false);
form.resetFields(); form.resetFields();
@ -95,6 +96,30 @@ const CreateKey: React.FC<CreateKeyProps> = ({
message.success('API Key copied to clipboard'); message.success('API Key copied to clipboard');
}; };
useEffect(() => {
let tempModelsToPick = [];
if (team) {
if (team.models.length > 0) {
if (team.models.includes("all-proxy-models")) {
// if the team has all-proxy-models show all available models
tempModelsToPick = userModels;
} else {
// show team models
tempModelsToPick = team.models;
}
} else {
// show all available models if the team has no models set
tempModelsToPick = userModels;
}
} else {
// no team set, show all available models
tempModelsToPick = userModels;
}
setModelsToPick(tempModelsToPick);
}, [team, userModels]);
return ( return (
<div> <div>
@ -161,30 +186,15 @@ const CreateKey: React.FC<CreateKeyProps> = ({
<Option key="all-team-models" value="all-team-models"> <Option key="all-team-models" value="all-team-models">
All Team Models All Team Models
</Option> </Option>
{team && team.models ? ( {
team.models.includes("all-proxy-models") ? ( modelsToPick.map((model: string) => (
userModels.map((model: string) => (
( (
<Option key={model} value={model}> <Option key={model} value={model}>
{model} {model}
</Option> </Option>
) )
)) ))
) : ( }
team.models.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)
) : (
userModels.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)}
</Select> </Select>
</Form.Item> </Form.Item>
<Accordion className="mt-20 mb-8" > <Accordion className="mt-20 mb-8" >