forked from phoenix/litellm-mirror
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
82be9a7e67
30 changed files with 341 additions and 183 deletions
|
@ -1,13 +1,13 @@
|
|||
# Slack Alerting
|
||||
# 🚨 Alerting
|
||||
|
||||
Get alerts for:
|
||||
- hanging LLM api calls
|
||||
- failed LLM api calls
|
||||
- slow LLM api calls
|
||||
- budget Tracking per key/user:
|
||||
- Hanging LLM api calls
|
||||
- Failed LLM api calls
|
||||
- Slow LLM api calls
|
||||
- Budget Tracking per key/user:
|
||||
- When a User/Key crosses their Budget
|
||||
- When a User/Key is 15% away from crossing their Budget
|
||||
- failed db read/writes
|
||||
- Failed db read/writes
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
|
|
@ -43,6 +43,12 @@ const sidebars = {
|
|||
"proxy/user_keys",
|
||||
"proxy/enterprise",
|
||||
"proxy/virtual_keys",
|
||||
"proxy/alerting",
|
||||
{
|
||||
type: "category",
|
||||
label: "Logging",
|
||||
items: ["proxy/logging", "proxy/streaming_logging"],
|
||||
},
|
||||
"proxy/team_based_routing",
|
||||
"proxy/ui",
|
||||
"proxy/cost_tracking",
|
||||
|
@ -58,11 +64,6 @@ const sidebars = {
|
|||
"proxy/pii_masking",
|
||||
"proxy/prompt_injection",
|
||||
"proxy/caching",
|
||||
{
|
||||
type: "category",
|
||||
label: "Logging, Alerting",
|
||||
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
|
||||
},
|
||||
"proxy/prometheus",
|
||||
"proxy/call_hooks",
|
||||
"proxy/rules",
|
||||
|
|
|
@ -12,7 +12,9 @@ import litellm
|
|||
|
||||
class LangFuseLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self, langfuse_public_key=None, langfuse_secret=None):
|
||||
def __init__(
|
||||
self, langfuse_public_key=None, langfuse_secret=None, flush_interval=1
|
||||
):
|
||||
try:
|
||||
from langfuse import Langfuse
|
||||
except Exception as e:
|
||||
|
@ -31,7 +33,7 @@ class LangFuseLogger:
|
|||
host=self.langfuse_host,
|
||||
release=self.langfuse_release,
|
||||
debug=self.langfuse_debug,
|
||||
flush_interval=1, # flush interval in seconds
|
||||
flush_interval=flush_interval, # flush interval in seconds
|
||||
)
|
||||
|
||||
# set the current langfuse project id in the environ
|
||||
|
|
|
@ -12,6 +12,7 @@ from litellm.caching import DualCache
|
|||
import asyncio
|
||||
import aiohttp
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
import datetime
|
||||
|
||||
|
||||
class SlackAlerting:
|
||||
|
@ -47,6 +48,18 @@ class SlackAlerting:
|
|||
self.internal_usage_cache = DualCache()
|
||||
self.async_http_handler = AsyncHTTPHandler()
|
||||
self.alert_to_webhook_url = alert_to_webhook_url
|
||||
self.langfuse_logger = None
|
||||
|
||||
try:
|
||||
from litellm.integrations.langfuse import LangFuseLogger
|
||||
|
||||
self.langfuse_logger = LangFuseLogger(
|
||||
os.getenv("LANGFUSE_PUBLIC_KEY"),
|
||||
os.getenv("LANGFUSE_SECRET_KEY"),
|
||||
flush_interval=1,
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
pass
|
||||
|
||||
|
@ -93,39 +106,68 @@ class SlackAlerting:
|
|||
request_info: str,
|
||||
request_data: Optional[dict] = None,
|
||||
kwargs: Optional[dict] = None,
|
||||
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
||||
start_time: Optional[datetime.datetime] = None,
|
||||
end_time: Optional[datetime.datetime] = None,
|
||||
):
|
||||
import uuid
|
||||
|
||||
# For now: do nothing as we're debugging why this is not working as expected
|
||||
if request_data is not None:
|
||||
trace_id = request_data.get("metadata", {}).get(
|
||||
"trace_id", None
|
||||
) # get langfuse trace id
|
||||
if trace_id is None:
|
||||
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
||||
request_data["metadata"]["trace_id"] = trace_id
|
||||
elif kwargs is not None:
|
||||
_litellm_params = kwargs.get("litellm_params", {})
|
||||
trace_id = _litellm_params.get("metadata", {}).get(
|
||||
"trace_id", None
|
||||
) # get langfuse trace id
|
||||
if trace_id is None:
|
||||
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
||||
_litellm_params["metadata"]["trace_id"] = trace_id
|
||||
|
||||
# Log hanging request as an error on langfuse
|
||||
if type == "hanging_request":
|
||||
if self.langfuse_logger is not None:
|
||||
_logging_kwargs = copy.deepcopy(request_data)
|
||||
if _logging_kwargs is None:
|
||||
_logging_kwargs = {}
|
||||
_logging_kwargs["litellm_params"] = {}
|
||||
request_data = request_data or {}
|
||||
_logging_kwargs["litellm_params"]["metadata"] = request_data.get(
|
||||
"metadata", {}
|
||||
)
|
||||
# log to langfuse in a separate thread
|
||||
import threading
|
||||
|
||||
threading.Thread(
|
||||
target=self.langfuse_logger.log_event,
|
||||
args=(
|
||||
_logging_kwargs,
|
||||
None,
|
||||
start_time,
|
||||
end_time,
|
||||
None,
|
||||
print,
|
||||
"ERROR",
|
||||
"Requests is hanging",
|
||||
),
|
||||
).start()
|
||||
|
||||
_langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
||||
_langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
|
||||
|
||||
# langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
|
||||
|
||||
_langfuse_url = (
|
||||
f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
|
||||
)
|
||||
request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
|
||||
return request_info
|
||||
|
||||
# if request_data is not None:
|
||||
# trace_id = request_data.get("metadata", {}).get(
|
||||
# "trace_id", None
|
||||
# ) # get langfuse trace id
|
||||
# if trace_id is None:
|
||||
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
||||
# request_data["metadata"]["trace_id"] = trace_id
|
||||
# elif kwargs is not None:
|
||||
# _litellm_params = kwargs.get("litellm_params", {})
|
||||
# trace_id = _litellm_params.get("metadata", {}).get(
|
||||
# "trace_id", None
|
||||
# ) # get langfuse trace id
|
||||
# if trace_id is None:
|
||||
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
||||
# _litellm_params["metadata"]["trace_id"] = trace_id
|
||||
|
||||
# _langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
||||
# _langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
|
||||
|
||||
# # langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
|
||||
|
||||
# _langfuse_url = (
|
||||
# f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
|
||||
# )
|
||||
# request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
|
||||
# return request_info
|
||||
|
||||
def _response_taking_too_long_callback(
|
||||
self,
|
||||
kwargs, # kwargs to completion
|
||||
|
@ -167,6 +209,14 @@ class SlackAlerting:
|
|||
_deployment_latencies = metadata["_latency_per_deployment"]
|
||||
if len(_deployment_latencies) == 0:
|
||||
return None
|
||||
try:
|
||||
# try sorting deployments by latency
|
||||
_deployment_latencies = sorted(
|
||||
_deployment_latencies.items(), key=lambda x: x[1]
|
||||
)
|
||||
_deployment_latencies = dict(_deployment_latencies)
|
||||
except:
|
||||
pass
|
||||
for api_base, latency in _deployment_latencies.items():
|
||||
_message_to_send += f"\n{api_base}: {round(latency,2)}s"
|
||||
_message_to_send = "```" + _message_to_send + "```"
|
||||
|
@ -194,7 +244,7 @@ class SlackAlerting:
|
|||
if time_difference_float > self.alerting_threshold:
|
||||
if "langfuse" in litellm.success_callback:
|
||||
request_info = self._add_langfuse_trace_id_to_alert(
|
||||
request_info=request_info, kwargs=kwargs
|
||||
request_info=request_info, kwargs=kwargs, type="slow_response"
|
||||
)
|
||||
# add deployment latencies to alert
|
||||
if (
|
||||
|
@ -222,8 +272,8 @@ class SlackAlerting:
|
|||
|
||||
async def response_taking_too_long(
|
||||
self,
|
||||
start_time: Optional[float] = None,
|
||||
end_time: Optional[float] = None,
|
||||
start_time: Optional[datetime.datetime] = None,
|
||||
end_time: Optional[datetime.datetime] = None,
|
||||
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
||||
request_data: Optional[dict] = None,
|
||||
):
|
||||
|
@ -243,10 +293,6 @@ class SlackAlerting:
|
|||
except:
|
||||
messages = ""
|
||||
request_info = f"\nRequest Model: `{model}`\nMessages: `{messages}`"
|
||||
if "langfuse" in litellm.success_callback:
|
||||
request_info = self._add_langfuse_trace_id_to_alert(
|
||||
request_info=request_info, request_data=request_data
|
||||
)
|
||||
else:
|
||||
request_info = ""
|
||||
|
||||
|
@ -288,6 +334,15 @@ class SlackAlerting:
|
|||
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
|
||||
)
|
||||
|
||||
if "langfuse" in litellm.success_callback:
|
||||
request_info = self._add_langfuse_trace_id_to_alert(
|
||||
request_info=request_info,
|
||||
request_data=request_data,
|
||||
type="hanging_request",
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
# add deployment latencies to alert
|
||||
_deployment_latency_map = self._get_deployment_latencies_to_alert(
|
||||
metadata=request_data.get("metadata", {})
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""]
|
||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -4,5 +4,20 @@ model_list:
|
|||
api_key: my-fake-key
|
||||
model: openai/my-fake-model
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: http://0.0.0.0:8080
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model-2
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: http://0.0.0.0:8080
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model-3
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: http://0.0.0.0:8080
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model-4
|
||||
model_name: fake-openai-endpoint
|
||||
router_settings:
|
||||
num_retries: 0
|
|
@ -95,7 +95,15 @@ def common_checks(
|
|||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||
)
|
||||
# 7. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||
if litellm.max_budget > 0 and global_proxy_spend is not None:
|
||||
if (
|
||||
litellm.max_budget > 0
|
||||
and global_proxy_spend is not None
|
||||
# only run global budget checks for OpenAI routes
|
||||
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
||||
and route in LiteLLMRoutes.openai_routes.value
|
||||
and route != "/v1/models"
|
||||
and route != "/models"
|
||||
):
|
||||
if global_proxy_spend > litellm.max_budget:
|
||||
raise Exception(
|
||||
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
||||
|
|
|
@ -72,7 +72,9 @@ class Router:
|
|||
## RELIABILITY ##
|
||||
num_retries: Optional[int] = None,
|
||||
timeout: Optional[float] = None,
|
||||
default_litellm_params={}, # default params for Router.chat.completion.create
|
||||
default_litellm_params: Optional[
|
||||
dict
|
||||
] = None, # default params for Router.chat.completion.create
|
||||
default_max_parallel_requests: Optional[int] = None,
|
||||
set_verbose: bool = False,
|
||||
debug_level: Literal["DEBUG", "INFO"] = "INFO",
|
||||
|
@ -158,6 +160,7 @@ class Router:
|
|||
router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
|
||||
```
|
||||
"""
|
||||
|
||||
if semaphore:
|
||||
self.semaphore = semaphore
|
||||
self.set_verbose = set_verbose
|
||||
|
@ -260,6 +263,7 @@ class Router:
|
|||
) # dict to store aliases for router, ex. {"gpt-4": "gpt-3.5-turbo"}, all requests with gpt-4 -> get routed to gpt-3.5-turbo group
|
||||
|
||||
# make Router.chat.completions.create compatible for openai.chat.completions.create
|
||||
default_litellm_params = default_litellm_params or {}
|
||||
self.chat = litellm.Chat(params=default_litellm_params, router_obj=self)
|
||||
|
||||
# default litellm args
|
||||
|
@ -475,6 +479,7 @@ class Router:
|
|||
)
|
||||
kwargs["model_info"] = deployment.get("model_info", {})
|
||||
data = deployment["litellm_params"].copy()
|
||||
|
||||
model_name = data["model"]
|
||||
for k, v in self.default_litellm_params.items():
|
||||
if (
|
||||
|
@ -1453,6 +1458,7 @@ class Router:
|
|||
await asyncio.sleep(timeout)
|
||||
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
|
||||
raise e # don't wait to retry if deployment hits user-defined rate-limit
|
||||
|
||||
elif hasattr(original_exception, "status_code") and litellm._should_retry(
|
||||
status_code=original_exception.status_code
|
||||
):
|
||||
|
@ -1614,6 +1620,28 @@ class Router:
|
|||
raise e
|
||||
raise original_exception
|
||||
|
||||
def _router_should_retry(
|
||||
self, e: Exception, remaining_retries: int, num_retries: int
|
||||
):
|
||||
"""
|
||||
Calculate back-off, then retry
|
||||
"""
|
||||
if hasattr(e, "response") and hasattr(e.response, "headers"):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
response_headers=e.response.headers,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
time.sleep(timeout)
|
||||
else:
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
time.sleep(timeout)
|
||||
|
||||
def function_with_retries(self, *args, **kwargs):
|
||||
"""
|
||||
Try calling the model 3 times. Shuffle between available deployments.
|
||||
|
@ -1633,9 +1661,6 @@ class Router:
|
|||
return response
|
||||
except Exception as e:
|
||||
original_exception = e
|
||||
verbose_router_logger.debug(
|
||||
f"num retries in function with retries: {num_retries}"
|
||||
)
|
||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
|
||||
if (
|
||||
isinstance(original_exception, litellm.ContextWindowExceededError)
|
||||
|
@ -1649,6 +1674,11 @@ class Router:
|
|||
if num_retries > 0:
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||
### RETRY
|
||||
self._router_should_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
)
|
||||
for current_attempt in range(num_retries):
|
||||
verbose_router_logger.debug(
|
||||
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
|
||||
|
@ -1662,34 +1692,11 @@ class Router:
|
|||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||
remaining_retries = num_retries - current_attempt
|
||||
if "No models available" in str(e):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
self._router_should_retry(
|
||||
e=e,
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
num_retries=num_retries,
|
||||
)
|
||||
time.sleep(timeout)
|
||||
elif (
|
||||
hasattr(e, "status_code")
|
||||
and hasattr(e, "response")
|
||||
and litellm._should_retry(status_code=e.status_code)
|
||||
):
|
||||
if hasattr(e.response, "headers"):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
response_headers=e.response.headers,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
else:
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
max_retries=num_retries,
|
||||
min_timeout=self.retry_after,
|
||||
)
|
||||
time.sleep(timeout)
|
||||
else:
|
||||
raise e
|
||||
raise original_exception
|
||||
|
||||
### HELPER FUNCTIONS
|
||||
|
@ -1987,6 +1994,8 @@ class Router:
|
|||
# check if it ends with a trailing slash
|
||||
if api_base.endswith("/"):
|
||||
api_base += "v1/"
|
||||
elif api_base.endswith("/v1"):
|
||||
api_base += "/"
|
||||
else:
|
||||
api_base += "/v1/"
|
||||
|
||||
|
|
|
@ -119,7 +119,9 @@ def test_multiple_deployments_parallel():
|
|||
|
||||
|
||||
# test_multiple_deployments_parallel()
|
||||
def test_cooldown_same_model_name():
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cooldown_same_model_name(sync_mode):
|
||||
# users could have the same model with different api_base
|
||||
# example
|
||||
# azure/chatgpt, api_base: 1234
|
||||
|
@ -161,6 +163,7 @@ def test_cooldown_same_model_name():
|
|||
num_retries=3,
|
||||
) # type: ignore
|
||||
|
||||
if sync_mode:
|
||||
response = router.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||
|
@ -176,6 +179,23 @@ def test_cooldown_same_model_name():
|
|||
model_ids[0] != model_ids[1]
|
||||
) # ensure both models have a uuid added, and they have different names
|
||||
|
||||
print("\ngot response\n", response)
|
||||
else:
|
||||
response = await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||
)
|
||||
print(router.model_list)
|
||||
model_ids = []
|
||||
for model in router.model_list:
|
||||
model_ids.append(model["model_info"]["id"])
|
||||
print("\n litellm model ids ", model_ids)
|
||||
|
||||
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
||||
assert (
|
||||
model_ids[0] != model_ids[1]
|
||||
) # ensure both models have a uuid added, and they have different names
|
||||
|
||||
print("\ngot response\n", response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Got unexpected exception on router! - {e}")
|
||||
|
|
|
@ -483,6 +483,8 @@ def test_mistral_embeddings():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="local test")
|
||||
def test_watsonx_embeddings():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -201,6 +201,7 @@ async def test_router_atext_completion_streaming():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_completion_streaming():
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello, can you generate a 500 words poem?"}
|
||||
]
|
||||
|
@ -219,9 +220,9 @@ async def test_router_completion_streaming():
|
|||
{
|
||||
"model_name": "azure-model",
|
||||
"litellm_params": {
|
||||
"model": "azure/gpt-35-turbo",
|
||||
"api_key": "os.environ/AZURE_EUROPE_API_KEY",
|
||||
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
|
||||
"model": "azure/gpt-turbo",
|
||||
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
||||
"api_base": "https://openai-france-1234.openai.azure.com",
|
||||
"rpm": 6,
|
||||
},
|
||||
"model_info": {"id": 2},
|
||||
|
@ -229,9 +230,9 @@ async def test_router_completion_streaming():
|
|||
{
|
||||
"model_name": "azure-model",
|
||||
"litellm_params": {
|
||||
"model": "azure/gpt-35-turbo",
|
||||
"api_key": "os.environ/AZURE_CANADA_API_KEY",
|
||||
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
|
||||
"model": "azure/gpt-turbo",
|
||||
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
||||
"api_base": "https://openai-france-1234.openai.azure.com",
|
||||
"rpm": 6,
|
||||
},
|
||||
"model_info": {"id": 3},
|
||||
|
@ -262,4 +263,4 @@ async def test_router_completion_streaming():
|
|||
## check if calls equally distributed
|
||||
cache_dict = router.cache.get_cache(key=cache_key)
|
||||
for k, v in cache_dict.items():
|
||||
assert v == 1
|
||||
assert v == 1, f"Failed. K={k} called v={v} times, cache_dict={cache_dict}"
|
||||
|
|
|
@ -57,6 +57,7 @@ def test_router_num_retries_init(num_retries, max_retries):
|
|||
else:
|
||||
assert getattr(model_client, "max_retries") == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)]
|
||||
)
|
||||
|
@ -137,6 +138,7 @@ def test_router_azure_ai_studio_init(mistral_api_base):
|
|||
print(f"uri_reference: {uri_reference}")
|
||||
|
||||
assert "/v1/" in uri_reference
|
||||
assert uri_reference.count("v1") == 1
|
||||
|
||||
|
||||
def test_exception_raising():
|
||||
|
|
|
@ -203,7 +203,7 @@ def test_timeouts_router():
|
|||
},
|
||||
},
|
||||
]
|
||||
router = Router(model_list=model_list)
|
||||
router = Router(model_list=model_list, num_retries=0)
|
||||
|
||||
print("PASSED !")
|
||||
|
||||
|
@ -396,7 +396,9 @@ def test_router_init_gpt_4_vision_enhancements():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_openai_with_organization():
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_with_organization(sync_mode):
|
||||
try:
|
||||
print("Testing OpenAI with organization")
|
||||
model_list = [
|
||||
|
@ -418,6 +420,7 @@ def test_openai_with_organization():
|
|||
print(router.model_list)
|
||||
print(router.model_list[0])
|
||||
|
||||
if sync_mode:
|
||||
openai_client = router._get_client(
|
||||
deployment=router.model_list[0],
|
||||
kwargs={"input": ["hello"], "model": "openai-bad-org"},
|
||||
|
@ -433,7 +436,9 @@ def test_openai_with_organization():
|
|||
model="openai-bad-org",
|
||||
messages=[{"role": "user", "content": "this is a test"}],
|
||||
)
|
||||
pytest.fail("Request should have failed - This organization does not exist")
|
||||
pytest.fail(
|
||||
"Request should have failed - This organization does not exist"
|
||||
)
|
||||
except Exception as e:
|
||||
print("Got exception: " + str(e))
|
||||
assert "No such organization: org-ikDc4ex8NB" in str(e)
|
||||
|
@ -444,6 +449,36 @@ def test_openai_with_organization():
|
|||
messages=[{"role": "user", "content": "this is a test"}],
|
||||
max_tokens=5,
|
||||
)
|
||||
else:
|
||||
openai_client = router._get_client(
|
||||
deployment=router.model_list[0],
|
||||
kwargs={"input": ["hello"], "model": "openai-bad-org"},
|
||||
client_type="async",
|
||||
)
|
||||
print(vars(openai_client))
|
||||
|
||||
assert openai_client.organization == "org-ikDc4ex8NB"
|
||||
|
||||
# bad org raises error
|
||||
|
||||
try:
|
||||
response = await router.acompletion(
|
||||
model="openai-bad-org",
|
||||
messages=[{"role": "user", "content": "this is a test"}],
|
||||
)
|
||||
pytest.fail(
|
||||
"Request should have failed - This organization does not exist"
|
||||
)
|
||||
except Exception as e:
|
||||
print("Got exception: " + str(e))
|
||||
assert "No such organization: org-ikDc4ex8NB" in str(e)
|
||||
|
||||
# good org works
|
||||
response = await router.acompletion(
|
||||
model="openai-good-org",
|
||||
messages=[{"role": "user", "content": "this is a test"}],
|
||||
max_tokens=5,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
|
|
@ -57,6 +57,7 @@ def test_router_timeouts():
|
|||
redis_password=os.getenv("REDIS_PASSWORD"),
|
||||
redis_port=int(os.getenv("REDIS_PORT")),
|
||||
timeout=10,
|
||||
num_retries=0,
|
||||
)
|
||||
|
||||
print("***** TPM SETTINGS *****")
|
||||
|
@ -89,15 +90,15 @@ def test_router_timeouts():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_timeouts_bedrock():
|
||||
import openai
|
||||
import openai, uuid
|
||||
|
||||
# Model list for OpenAI and Anthropic models
|
||||
model_list = [
|
||||
_model_list = [
|
||||
{
|
||||
"model_name": "bedrock",
|
||||
"litellm_params": {
|
||||
"model": "bedrock/anthropic.claude-instant-v1",
|
||||
"timeout": 0.001,
|
||||
"timeout": 0.00001,
|
||||
},
|
||||
"tpm": 80000,
|
||||
},
|
||||
|
@ -105,17 +106,18 @@ async def test_router_timeouts_bedrock():
|
|||
|
||||
# Configure router
|
||||
router = Router(
|
||||
model_list=model_list,
|
||||
model_list=_model_list,
|
||||
routing_strategy="usage-based-routing",
|
||||
debug_level="DEBUG",
|
||||
set_verbose=True,
|
||||
num_retries=0,
|
||||
)
|
||||
|
||||
litellm.set_verbose = True
|
||||
try:
|
||||
response = await router.acompletion(
|
||||
model="bedrock",
|
||||
messages=[{"role": "user", "content": "hello, who are u"}],
|
||||
messages=[{"role": "user", "content": f"hello, who are u {uuid.uuid4()}"}],
|
||||
)
|
||||
print(response)
|
||||
pytest.fail("Did not raise error `openai.APITimeoutError`")
|
||||
|
|
|
@ -5488,11 +5488,15 @@ def get_optional_params(
|
|||
if "decoding_method" in passed_params:
|
||||
extra_body["decoding_method"] = passed_params.pop("decoding_method")
|
||||
if "min_tokens" in passed_params or "min_new_tokens" in passed_params:
|
||||
extra_body["min_new_tokens"] = passed_params.pop("min_tokens", passed_params.pop("min_new_tokens"))
|
||||
extra_body["min_new_tokens"] = passed_params.pop(
|
||||
"min_tokens", passed_params.pop("min_new_tokens")
|
||||
)
|
||||
if "top_k" in passed_params:
|
||||
extra_body["top_k"] = passed_params.pop("top_k")
|
||||
if "truncate_input_tokens" in passed_params:
|
||||
extra_body["truncate_input_tokens"] = passed_params.pop("truncate_input_tokens")
|
||||
extra_body["truncate_input_tokens"] = passed_params.pop(
|
||||
"truncate_input_tokens"
|
||||
)
|
||||
if "length_penalty" in passed_params:
|
||||
extra_body["length_penalty"] = passed_params.pop("length_penalty")
|
||||
if "time_limit" in passed_params:
|
||||
|
@ -9837,19 +9841,21 @@ class CustomStreamWrapper:
|
|||
elif isinstance(chunk, (str, bytes)):
|
||||
if isinstance(chunk, bytes):
|
||||
chunk = chunk.decode("utf-8")
|
||||
if 'generated_text' in chunk:
|
||||
response = chunk.replace('data: ', '').strip()
|
||||
if "generated_text" in chunk:
|
||||
response = chunk.replace("data: ", "").strip()
|
||||
parsed_response = json.loads(response)
|
||||
else:
|
||||
return {"text": "", "is_finished": False}
|
||||
else:
|
||||
print_verbose(f"chunk: {chunk} (Type: {type(chunk)})")
|
||||
raise ValueError(f"Unable to parse response. Original response: {chunk}")
|
||||
raise ValueError(
|
||||
f"Unable to parse response. Original response: {chunk}"
|
||||
)
|
||||
results = parsed_response.get("results", [])
|
||||
if len(results) > 0:
|
||||
text = results[0].get("generated_text", "")
|
||||
finish_reason = results[0].get("stop_reason")
|
||||
is_finished = finish_reason != 'not_finished'
|
||||
is_finished = finish_reason != "not_finished"
|
||||
return {
|
||||
"text": text,
|
||||
"is_finished": is_finished,
|
||||
|
@ -10119,16 +10125,6 @@ class CustomStreamWrapper:
|
|||
elif self.custom_llm_provider == "watsonx":
|
||||
response_obj = self.handle_watsonx_stream(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
if response_obj.get("prompt_tokens") is not None:
|
||||
prompt_token_count = getattr(model_response.usage, "prompt_tokens", 0)
|
||||
model_response.usage.prompt_tokens = (prompt_token_count+response_obj["prompt_tokens"])
|
||||
if response_obj.get("completion_tokens") is not None:
|
||||
model_response.usage.completion_tokens = response_obj["completion_tokens"]
|
||||
model_response.usage.total_tokens = (
|
||||
getattr(model_response.usage, "prompt_tokens", 0)
|
||||
+ getattr(model_response.usage, "completion_tokens", 0)
|
||||
)
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "text-completion-openai":
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.35.30"
|
||||
version = "1.35.31"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.35.30"
|
||||
version = "1.35.31"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""]
|
||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -39,6 +39,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
|||
const [apiKey, setApiKey] = useState(null);
|
||||
const [softBudget, setSoftBudget] = useState(null);
|
||||
const [userModels, setUserModels] = useState([]);
|
||||
const [modelsToPick, setModelsToPick] = useState([]);
|
||||
const handleOk = () => {
|
||||
setIsModalVisible(false);
|
||||
form.resetFields();
|
||||
|
@ -95,6 +96,30 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
|||
message.success('API Key copied to clipboard');
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
let tempModelsToPick = [];
|
||||
|
||||
if (team) {
|
||||
if (team.models.length > 0) {
|
||||
if (team.models.includes("all-proxy-models")) {
|
||||
// if the team has all-proxy-models show all available models
|
||||
tempModelsToPick = userModels;
|
||||
} else {
|
||||
// show team models
|
||||
tempModelsToPick = team.models;
|
||||
}
|
||||
} else {
|
||||
// show all available models if the team has no models set
|
||||
tempModelsToPick = userModels;
|
||||
}
|
||||
} else {
|
||||
// no team set, show all available models
|
||||
tempModelsToPick = userModels;
|
||||
}
|
||||
|
||||
setModelsToPick(tempModelsToPick);
|
||||
}, [team, userModels]);
|
||||
|
||||
|
||||
return (
|
||||
<div>
|
||||
|
@ -161,30 +186,15 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
|||
<Option key="all-team-models" value="all-team-models">
|
||||
All Team Models
|
||||
</Option>
|
||||
{team && team.models ? (
|
||||
team.models.includes("all-proxy-models") ? (
|
||||
userModels.map((model: string) => (
|
||||
{
|
||||
modelsToPick.map((model: string) => (
|
||||
(
|
||||
<Option key={model} value={model}>
|
||||
{model}
|
||||
</Option>
|
||||
)
|
||||
))
|
||||
) : (
|
||||
team.models.map((model: string) => (
|
||||
<Option key={model} value={model}>
|
||||
{model}
|
||||
</Option>
|
||||
))
|
||||
)
|
||||
) : (
|
||||
userModels.map((model: string) => (
|
||||
<Option key={model} value={model}>
|
||||
{model}
|
||||
</Option>
|
||||
))
|
||||
)}
|
||||
|
||||
}
|
||||
</Select>
|
||||
</Form.Item>
|
||||
<Accordion className="mt-20 mb-8" >
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue