Merge branch 'main' into litellm_common_auth_params

This commit is contained in:
Krish Dholakia 2024-04-28 08:38:06 -07:00 committed by GitHub
commit 1841b74f49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
44 changed files with 531 additions and 254 deletions

1
.gitignore vendored
View file

@ -51,3 +51,4 @@ loadtest_kub.yaml
litellm/proxy/_new_secret_config.yaml
litellm/proxy/_new_secret_config.yaml
litellm/proxy/_super_secret_config.yaml
litellm/proxy/_super_secret_config.yaml

View file

@ -227,6 +227,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |

View file

@ -167,6 +167,9 @@ messages = [
chat(messages)
```
## Redacting Messages, Response Content from Langfuse Logging
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
## Troubleshooting & Errors
### Data not getting logged to Langfuse ?

View file

@ -40,5 +40,9 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
print(response)
```
## Redacting Messages, Response Content from Sentry Logging
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to sentry, but request metadata will still be logged.
[Let us know](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+) if you need any additional options from Sentry.

View file

@ -20,7 +20,7 @@ See [here](https://cloud.ibm.com/apidocs/watsonx-ai#api-authentication) for more
## Usage
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/LiteLLM_IBM_Watsonx.ipynb">
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_IBM_Watsonx.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

View file

@ -1,13 +1,13 @@
# Slack Alerting
# 🚨 Alerting
Get alerts for:
- hanging LLM api calls
- failed LLM api calls
- slow LLM api calls
- budget Tracking per key/user:
- Hanging LLM api calls
- Failed LLM api calls
- Slow LLM api calls
- Budget Tracking per key/user:
- When a User/Key crosses their Budget
- When a User/Key is 15% away from crossing their Budget
- failed db read/writes
- Failed db read/writes
## Quick Start

View file

@ -569,6 +569,22 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
All requests made with these keys will log data to their team-specific logging.
### Redacting Messages, Response Content from Langfuse Logging
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
litellm_settings:
success_callback: ["langfuse"]
turn_off_message_logging: True
```
## Logging Proxy Input/Output - DataDog
We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog

View file

@ -43,6 +43,12 @@ const sidebars = {
"proxy/user_keys",
"proxy/enterprise",
"proxy/virtual_keys",
"proxy/alerting",
{
type: "category",
label: "Logging",
items: ["proxy/logging", "proxy/streaming_logging"],
},
"proxy/team_based_routing",
"proxy/ui",
"proxy/cost_tracking",
@ -58,11 +64,6 @@ const sidebars = {
"proxy/pii_masking",
"proxy/prompt_injection",
"proxy/caching",
{
type: "category",
label: "Logging, Alerting",
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
},
"proxy/prometheus",
"proxy/call_hooks",
"proxy/rules",

View file

@ -45,6 +45,7 @@ _async_failure_callback: List[Callable] = (
) # internal variable - async custom callbacks are routed here.
pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False
## end of callbacks #############
email: Optional[str] = (

View file

@ -12,7 +12,9 @@ import litellm
class LangFuseLogger:
# Class variables or attributes
def __init__(self, langfuse_public_key=None, langfuse_secret=None):
def __init__(
self, langfuse_public_key=None, langfuse_secret=None, flush_interval=1
):
try:
from langfuse import Langfuse
except Exception as e:
@ -31,7 +33,7 @@ class LangFuseLogger:
host=self.langfuse_host,
release=self.langfuse_release,
debug=self.langfuse_debug,
flush_interval=1, # flush interval in seconds
flush_interval=flush_interval, # flush interval in seconds
)
# set the current langfuse project id in the environ

View file

@ -12,6 +12,7 @@ from litellm.caching import DualCache
import asyncio
import aiohttp
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
import datetime
class SlackAlerting:
@ -47,6 +48,18 @@ class SlackAlerting:
self.internal_usage_cache = DualCache()
self.async_http_handler = AsyncHTTPHandler()
self.alert_to_webhook_url = alert_to_webhook_url
self.langfuse_logger = None
try:
from litellm.integrations.langfuse import LangFuseLogger
self.langfuse_logger = LangFuseLogger(
os.getenv("LANGFUSE_PUBLIC_KEY"),
os.getenv("LANGFUSE_SECRET_KEY"),
flush_interval=1,
)
except:
pass
pass
@ -93,39 +106,68 @@ class SlackAlerting:
request_info: str,
request_data: Optional[dict] = None,
kwargs: Optional[dict] = None,
type: Literal["hanging_request", "slow_response"] = "hanging_request",
start_time: Optional[datetime.datetime] = None,
end_time: Optional[datetime.datetime] = None,
):
import uuid
# For now: do nothing as we're debugging why this is not working as expected
if request_data is not None:
trace_id = request_data.get("metadata", {}).get(
"trace_id", None
) # get langfuse trace id
if trace_id is None:
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
request_data["metadata"]["trace_id"] = trace_id
elif kwargs is not None:
_litellm_params = kwargs.get("litellm_params", {})
trace_id = _litellm_params.get("metadata", {}).get(
"trace_id", None
) # get langfuse trace id
if trace_id is None:
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
_litellm_params["metadata"]["trace_id"] = trace_id
# Log hanging request as an error on langfuse
if type == "hanging_request":
if self.langfuse_logger is not None:
_logging_kwargs = copy.deepcopy(request_data)
if _logging_kwargs is None:
_logging_kwargs = {}
_logging_kwargs["litellm_params"] = {}
request_data = request_data or {}
_logging_kwargs["litellm_params"]["metadata"] = request_data.get(
"metadata", {}
)
# log to langfuse in a separate thread
import threading
threading.Thread(
target=self.langfuse_logger.log_event,
args=(
_logging_kwargs,
None,
start_time,
end_time,
None,
print,
"ERROR",
"Requests is hanging",
),
).start()
_langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
_langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
# langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
_langfuse_url = (
f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
)
request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
return request_info
# if request_data is not None:
# trace_id = request_data.get("metadata", {}).get(
# "trace_id", None
# ) # get langfuse trace id
# if trace_id is None:
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
# request_data["metadata"]["trace_id"] = trace_id
# elif kwargs is not None:
# _litellm_params = kwargs.get("litellm_params", {})
# trace_id = _litellm_params.get("metadata", {}).get(
# "trace_id", None
# ) # get langfuse trace id
# if trace_id is None:
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
# _litellm_params["metadata"]["trace_id"] = trace_id
# _langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
# _langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
# # langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
# _langfuse_url = (
# f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
# )
# request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
# return request_info
def _response_taking_too_long_callback(
self,
kwargs, # kwargs to completion
@ -167,6 +209,14 @@ class SlackAlerting:
_deployment_latencies = metadata["_latency_per_deployment"]
if len(_deployment_latencies) == 0:
return None
try:
# try sorting deployments by latency
_deployment_latencies = sorted(
_deployment_latencies.items(), key=lambda x: x[1]
)
_deployment_latencies = dict(_deployment_latencies)
except:
pass
for api_base, latency in _deployment_latencies.items():
_message_to_send += f"\n{api_base}: {round(latency,2)}s"
_message_to_send = "```" + _message_to_send + "```"
@ -194,7 +244,7 @@ class SlackAlerting:
if time_difference_float > self.alerting_threshold:
if "langfuse" in litellm.success_callback:
request_info = self._add_langfuse_trace_id_to_alert(
request_info=request_info, kwargs=kwargs
request_info=request_info, kwargs=kwargs, type="slow_response"
)
# add deployment latencies to alert
if (
@ -222,8 +272,8 @@ class SlackAlerting:
async def response_taking_too_long(
self,
start_time: Optional[float] = None,
end_time: Optional[float] = None,
start_time: Optional[datetime.datetime] = None,
end_time: Optional[datetime.datetime] = None,
type: Literal["hanging_request", "slow_response"] = "hanging_request",
request_data: Optional[dict] = None,
):
@ -243,10 +293,6 @@ class SlackAlerting:
except:
messages = ""
request_info = f"\nRequest Model: `{model}`\nMessages: `{messages}`"
if "langfuse" in litellm.success_callback:
request_info = self._add_langfuse_trace_id_to_alert(
request_info=request_info, request_data=request_data
)
else:
request_info = ""
@ -288,6 +334,15 @@ class SlackAlerting:
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
)
if "langfuse" in litellm.success_callback:
request_info = self._add_langfuse_trace_id_to_alert(
request_info=request_info,
request_data=request_data,
type="hanging_request",
start_time=start_time,
end_time=end_time,
)
# add deployment latencies to alert
_deployment_latency_map = self._get_deployment_latencies_to_alert(
metadata=request_data.get("metadata", {})

View file

@ -447,6 +447,7 @@ class OpenAIChatCompletion(BaseLLM):
)
else:
openai_aclient = client
## LOGGING
logging_obj.pre_call(
input=data["messages"],

View file

@ -1039,6 +1039,7 @@ async def async_streaming(
instances=None,
vertex_project=None,
vertex_location=None,
safety_settings=None,
**optional_params,
):
"""
@ -1065,6 +1066,7 @@ async def async_streaming(
response = await llm_model._generate_content_streaming_async(
contents=content,
generation_config=optional_params,
safety_settings=safety_settings,
tools=tools,
)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,23 +1,23 @@
model_list:
- model_name: text-embedding-3-small
litellm_params:
model: text-embedding-3-small
- model_name: whisper
litellm_params:
model: azure/azure-whisper
api_version: 2024-02-15-preview
api_base: os.environ/AZURE_EUROPE_API_BASE
api_key: os.environ/AZURE_EUROPE_API_KEY
model_info:
mode: audio_transcription
- litellm_params:
model: gpt-4
model_name: gpt-4
- model_name: azure-mistral
litellm_params:
model: azure/mistral-large-latest
api_base: https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com
api_key: os.environ/AZURE_MISTRAL_API_KEY
# litellm_settings:
# cache: True
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model
model_name: fake-openai-endpoint
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model-2
model_name: fake-openai-endpoint
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model-3
model_name: fake-openai-endpoint
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: my-fake-key
model: openai/my-fake-model-4
model_name: fake-openai-endpoint
router_settings:
num_retries: 0

View file

@ -95,7 +95,15 @@ def common_checks(
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
)
# 7. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
if litellm.max_budget > 0 and global_proxy_spend is not None:
if (
litellm.max_budget > 0
and global_proxy_spend is not None
# only run global budget checks for OpenAI routes
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
and route in LiteLLMRoutes.openai_routes.value
and route != "/v1/models"
and route != "/models"
):
if global_proxy_spend > litellm.max_budget:
raise Exception(
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"

View file

@ -50,7 +50,6 @@ class Router:
model_names: List = []
cache_responses: Optional[bool] = False
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
num_retries: int = 0
tenacity = None
leastbusy_logger: Optional[LeastBusyLoggingHandler] = None
lowesttpm_logger: Optional[LowestTPMLoggingHandler] = None
@ -70,9 +69,11 @@ class Router:
] = None, # if you want to cache across model groups
client_ttl: int = 3600, # ttl for cached clients - will re-initialize after this time in seconds
## RELIABILITY ##
num_retries: int = 0,
num_retries: Optional[int] = None,
timeout: Optional[float] = None,
default_litellm_params={}, # default params for Router.chat.completion.create
default_litellm_params: Optional[
dict
] = None, # default params for Router.chat.completion.create
default_max_parallel_requests: Optional[int] = None,
set_verbose: bool = False,
debug_level: Literal["DEBUG", "INFO"] = "INFO",
@ -158,6 +159,7 @@ class Router:
router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
```
"""
if semaphore:
self.semaphore = semaphore
self.set_verbose = set_verbose
@ -229,7 +231,14 @@ class Router:
self.failed_calls = (
InMemoryCache()
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
self.num_retries = num_retries or litellm.num_retries or 0
if num_retries is not None:
self.num_retries = num_retries
elif litellm.num_retries is not None:
self.num_retries = litellm.num_retries
else:
self.num_retries = openai.DEFAULT_MAX_RETRIES
self.timeout = timeout or litellm.request_timeout
self.retry_after = retry_after
@ -255,6 +264,7 @@ class Router:
) # dict to store aliases for router, ex. {"gpt-4": "gpt-3.5-turbo"}, all requests with gpt-4 -> get routed to gpt-3.5-turbo group
# make Router.chat.completions.create compatible for openai.chat.completions.create
default_litellm_params = default_litellm_params or {}
self.chat = litellm.Chat(params=default_litellm_params, router_obj=self)
# default litellm args
@ -428,6 +438,7 @@ class Router:
kwargs["messages"] = messages
kwargs["original_function"] = self._acompletion
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
timeout = kwargs.get("request_timeout", self.timeout)
kwargs.setdefault("metadata", {}).update({"model_group": model})
@ -469,6 +480,7 @@ class Router:
)
kwargs["model_info"] = deployment.get("model_info", {})
data = deployment["litellm_params"].copy()
model_name = data["model"]
for k, v in self.default_litellm_params.items():
if (
@ -1415,10 +1427,12 @@ class Router:
context_window_fallbacks = kwargs.pop(
"context_window_fallbacks", self.context_window_fallbacks
)
verbose_router_logger.debug(
f"async function w/ retries: original_function - {original_function}"
)
num_retries = kwargs.pop("num_retries")
verbose_router_logger.debug(
f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}"
)
try:
# if the function call is successful, no exception will be raised and we'll break out of the loop
response = await original_function(*args, **kwargs)
@ -1445,6 +1459,7 @@ class Router:
await asyncio.sleep(timeout)
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
raise e # don't wait to retry if deployment hits user-defined rate-limit
elif hasattr(original_exception, "status_code") and litellm._should_retry(
status_code=original_exception.status_code
):
@ -1606,6 +1621,28 @@ class Router:
raise e
raise original_exception
def _router_should_retry(
self, e: Exception, remaining_retries: int, num_retries: int
):
"""
Calculate back-off, then retry
"""
if hasattr(e, "response") and hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
def function_with_retries(self, *args, **kwargs):
"""
Try calling the model 3 times. Shuffle between available deployments.
@ -1625,9 +1662,6 @@ class Router:
return response
except Exception as e:
original_exception = e
verbose_router_logger.debug(
f"num retries in function with retries: {num_retries}"
)
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
if (
isinstance(original_exception, litellm.ContextWindowExceededError)
@ -1641,6 +1675,11 @@ class Router:
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
### RETRY
self._router_should_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
)
for current_attempt in range(num_retries):
verbose_router_logger.debug(
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
@ -1654,34 +1693,11 @@ class Router:
## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt
if "No models available" in str(e):
timeout = litellm._calculate_retry_after(
self._router_should_retry(
e=e,
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
num_retries=num_retries,
)
time.sleep(timeout)
elif (
hasattr(e, "status_code")
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
):
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
raise e
raise original_exception
### HELPER FUNCTIONS
@ -1979,6 +1995,8 @@ class Router:
# check if it ends with a trailing slash
if api_base.endswith("/"):
api_base += "v1/"
elif api_base.endswith("/v1"):
api_base += "/"
else:
api_base += "/v1/"
@ -2004,7 +2022,9 @@ class Router:
stream_timeout = litellm.get_secret(stream_timeout_env_name)
litellm_params["stream_timeout"] = stream_timeout
max_retries = litellm_params.pop("max_retries", 2)
max_retries = litellm_params.pop(
"max_retries", 0
) # router handles retry logic
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
max_retries_env_name = max_retries.replace("os.environ/", "")
max_retries = litellm.get_secret(max_retries_env_name)

View file

@ -119,7 +119,9 @@ def test_multiple_deployments_parallel():
# test_multiple_deployments_parallel()
def test_cooldown_same_model_name():
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_cooldown_same_model_name(sync_mode):
# users could have the same model with different api_base
# example
# azure/chatgpt, api_base: 1234
@ -161,6 +163,7 @@ def test_cooldown_same_model_name():
num_retries=3,
) # type: ignore
if sync_mode:
response = router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hello this request will pass"}],
@ -176,6 +179,23 @@ def test_cooldown_same_model_name():
model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names
print("\ngot response\n", response)
else:
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hello this request will pass"}],
)
print(router.model_list)
model_ids = []
for model in router.model_list:
model_ids.append(model["model_info"]["id"])
print("\n litellm model ids ", model_ids)
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
assert (
model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names
print("\ngot response\n", response)
except Exception as e:
pytest.fail(f"Got unexpected exception on router! - {e}")

View file

@ -161,40 +161,54 @@ async def make_async_calls():
return total_time
# def test_langfuse_logging_async_text_completion():
# try:
# pre_langfuse_setup()
# litellm.set_verbose = False
# litellm.success_callback = ["langfuse"]
@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [False, True])
async def test_langfuse_logging_without_request_response(stream):
try:
import uuid
# async def _test_langfuse():
# response = await litellm.atext_completion(
# model="gpt-3.5-turbo-instruct",
# prompt="this is a test",
# max_tokens=5,
# temperature=0.7,
# timeout=5,
# user="test_user",
# stream=True
# )
# async for chunk in response:
# print()
# print(chunk)
# await asyncio.sleep(1)
# return response
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
litellm.set_verbose = True
litellm.turn_off_message_logging = True
litellm.success_callback = ["langfuse"]
response = await litellm.acompletion(
model="gpt-3.5-turbo",
mock_response="It's simple to use and easy to get started",
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
max_tokens=10,
temperature=0.2,
stream=stream,
metadata={"trace_id": _unique_trace_name},
)
print(response)
if stream:
async for chunk in response:
print(chunk)
# response = asyncio.run(_test_langfuse())
# print(f"response: {response}")
await asyncio.sleep(3)
# # # check langfuse.log to see if there was a failed response
# search_logs("langfuse.log")
# except litellm.Timeout as e:
# pass
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
import langfuse
langfuse_client = langfuse.Langfuse(
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
)
# test_langfuse_logging_async_text_completion()
# get trace with _unique_trace_name
trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
print("trace_from_langfuse", trace)
_trace_data = trace.data
assert _trace_data[0].input == {"messages": "redacted-by-litellm"}
assert _trace_data[0].output == {
"role": "assistant",
"content": "redacted-by-litellm",
}
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
@pytest.mark.skip(reason="beta test - checking langfuse output")

View file

@ -2730,7 +2730,6 @@ async def test_acompletion_watsonx():
messages=messages,
temperature=0.2,
max_tokens=80,
space_id=os.getenv("WATSONX_SPACE_ID_TEST"),
)
# Add any assertions here to check the response
print(response)

View file

@ -483,6 +483,8 @@ def test_mistral_embeddings():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="local test")
def test_watsonx_embeddings():
try:
litellm.set_verbose = True

View file

@ -201,6 +201,7 @@ async def test_router_atext_completion_streaming():
@pytest.mark.asyncio
async def test_router_completion_streaming():
litellm.set_verbose = True
messages = [
{"role": "user", "content": "Hello, can you generate a 500 words poem?"}
]
@ -219,9 +220,9 @@ async def test_router_completion_streaming():
{
"model_name": "azure-model",
"litellm_params": {
"model": "azure/gpt-35-turbo",
"api_key": "os.environ/AZURE_EUROPE_API_KEY",
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
"model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 6,
},
"model_info": {"id": 2},
@ -229,9 +230,9 @@ async def test_router_completion_streaming():
{
"model_name": "azure-model",
"litellm_params": {
"model": "azure/gpt-35-turbo",
"api_key": "os.environ/AZURE_CANADA_API_KEY",
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
"model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 6,
},
"model_info": {"id": 3},
@ -262,4 +263,4 @@ async def test_router_completion_streaming():
## check if calls equally distributed
cache_dict = router.cache.get_cache(key=cache_key)
for k, v in cache_dict.items():
assert v == 1
assert v == 1, f"Failed. K={k} called v={v} times, cache_dict={cache_dict}"

View file

@ -1,7 +1,7 @@
#### What this tests ####
# This tests litellm router
import sys, os, time
import sys, os, time, openai
import traceback, asyncio
import pytest
@ -19,6 +19,45 @@ import os, httpx
load_dotenv()
@pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4])
def test_router_num_retries_init(num_retries, max_retries):
"""
- test when num_retries set v/s not
- test client value when max retries set v/s not
"""
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
"max_retries": max_retries,
},
"model_info": {"id": 12345},
},
],
num_retries=num_retries,
)
if num_retries is not None:
assert router.num_retries == num_retries
else:
assert router.num_retries == openai.DEFAULT_MAX_RETRIES
model_client = router._get_client(
{"model_info": {"id": 12345}}, client_type="async", kwargs={}
)
if max_retries is not None:
assert getattr(model_client, "max_retries") == max_retries
else:
assert getattr(model_client, "max_retries") == 0
@pytest.mark.parametrize(
"timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)]
)
@ -99,6 +138,7 @@ def test_router_azure_ai_studio_init(mistral_api_base):
print(f"uri_reference: {uri_reference}")
assert "/v1/" in uri_reference
assert uri_reference.count("v1") == 1
def test_exception_raising():

View file

@ -258,6 +258,7 @@ def test_sync_fallbacks_embeddings():
model_list=model_list,
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
set_verbose=False,
num_retries=0,
)
customHandler = MyCustomHandler()
litellm.callbacks = [customHandler]
@ -393,7 +394,7 @@ def test_dynamic_fallbacks_sync():
},
]
router = Router(model_list=model_list, set_verbose=True)
router = Router(model_list=model_list, set_verbose=True, num_retries=0)
kwargs = {}
kwargs["model"] = "azure/gpt-3.5-turbo"
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
@ -830,6 +831,7 @@ def test_usage_based_routing_fallbacks():
routing_strategy="usage-based-routing",
redis_host=os.environ["REDIS_HOST"],
redis_port=os.environ["REDIS_PORT"],
num_retries=0,
)
messages = [

View file

@ -203,7 +203,7 @@ def test_timeouts_router():
},
},
]
router = Router(model_list=model_list)
router = Router(model_list=model_list, num_retries=0)
print("PASSED !")
@ -396,7 +396,9 @@ def test_router_init_gpt_4_vision_enhancements():
pytest.fail(f"Error occurred: {e}")
def test_openai_with_organization():
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_with_organization(sync_mode):
try:
print("Testing OpenAI with organization")
model_list = [
@ -418,6 +420,7 @@ def test_openai_with_organization():
print(router.model_list)
print(router.model_list[0])
if sync_mode:
openai_client = router._get_client(
deployment=router.model_list[0],
kwargs={"input": ["hello"], "model": "openai-bad-org"},
@ -433,7 +436,9 @@ def test_openai_with_organization():
model="openai-bad-org",
messages=[{"role": "user", "content": "this is a test"}],
)
pytest.fail("Request should have failed - This organization does not exist")
pytest.fail(
"Request should have failed - This organization does not exist"
)
except Exception as e:
print("Got exception: " + str(e))
assert "No such organization: org-ikDc4ex8NB" in str(e)
@ -444,6 +449,36 @@ def test_openai_with_organization():
messages=[{"role": "user", "content": "this is a test"}],
max_tokens=5,
)
else:
openai_client = router._get_client(
deployment=router.model_list[0],
kwargs={"input": ["hello"], "model": "openai-bad-org"},
client_type="async",
)
print(vars(openai_client))
assert openai_client.organization == "org-ikDc4ex8NB"
# bad org raises error
try:
response = await router.acompletion(
model="openai-bad-org",
messages=[{"role": "user", "content": "this is a test"}],
)
pytest.fail(
"Request should have failed - This organization does not exist"
)
except Exception as e:
print("Got exception: " + str(e))
assert "No such organization: org-ikDc4ex8NB" in str(e)
# good org works
response = await router.acompletion(
model="openai-good-org",
messages=[{"role": "user", "content": "this is a test"}],
max_tokens=5,
)
except Exception as e:
pytest.fail(f"Error occurred: {e}")

View file

@ -57,6 +57,7 @@ def test_router_timeouts():
redis_password=os.getenv("REDIS_PASSWORD"),
redis_port=int(os.getenv("REDIS_PORT")),
timeout=10,
num_retries=0,
)
print("***** TPM SETTINGS *****")
@ -89,15 +90,15 @@ def test_router_timeouts():
@pytest.mark.asyncio
async def test_router_timeouts_bedrock():
import openai
import openai, uuid
# Model list for OpenAI and Anthropic models
model_list = [
_model_list = [
{
"model_name": "bedrock",
"litellm_params": {
"model": "bedrock/anthropic.claude-instant-v1",
"timeout": 0.001,
"timeout": 0.00001,
},
"tpm": 80000,
},
@ -105,17 +106,18 @@ async def test_router_timeouts_bedrock():
# Configure router
router = Router(
model_list=model_list,
model_list=_model_list,
routing_strategy="usage-based-routing",
debug_level="DEBUG",
set_verbose=True,
num_retries=0,
)
litellm.set_verbose = True
try:
response = await router.acompletion(
model="bedrock",
messages=[{"role": "user", "content": "hello, who are u"}],
messages=[{"role": "user", "content": f"hello, who are u {uuid.uuid4()}"}],
)
print(response)
pytest.fail("Did not raise error `openai.APITimeoutError`")

View file

@ -78,7 +78,8 @@ def test_hanging_request_azure():
"model_name": "openai-gpt",
"litellm_params": {"model": "gpt-3.5-turbo"},
},
]
],
num_retries=0,
)
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
@ -131,7 +132,8 @@ def test_hanging_request_openai():
"model_name": "openai-gpt",
"litellm_params": {"model": "gpt-3.5-turbo"},
},
]
],
num_retries=0,
)
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
@ -189,6 +191,7 @@ def test_timeout_streaming():
# test_timeout_streaming()
@pytest.mark.skip(reason="local test")
def test_timeout_ollama():
# this Will Raise a timeout
import litellm

View file

@ -110,7 +110,7 @@ class LiteLLM_Params(BaseModel):
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
)
max_retries: int = 2 # follows openai default of 2
max_retries: Optional[int] = None
organization: Optional[str] = None # for openai orgs
## VERTEX AI ##
vertex_project: Optional[str] = None
@ -148,9 +148,7 @@ class LiteLLM_Params(BaseModel):
args.pop("self", None)
args.pop("params", None)
args.pop("__class__", None)
if max_retries is None:
max_retries = 2
elif isinstance(max_retries, str):
if max_retries is not None and isinstance(max_retries, str):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)

View file

@ -1212,7 +1212,6 @@ class Logging:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
)
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
callbacks = litellm.input_callback + self.dynamic_input_callbacks
for callback in callbacks:
@ -1229,29 +1228,17 @@ class Logging:
litellm_call_id=self.litellm_params["litellm_call_id"],
print_verbose=print_verbose,
)
elif callback == "lite_debugger":
print_verbose(
f"reaches litedebugger for logging! - model_call_details {self.model_call_details}"
)
model = self.model_call_details["model"]
messages = self.model_call_details["input"]
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
liteDebuggerClient.input_log_event(
model=model,
messages=messages,
end_user=self.model_call_details.get("user", "default"),
litellm_call_id=self.litellm_params["litellm_call_id"],
litellm_params=self.model_call_details["litellm_params"],
optional_params=self.model_call_details["optional_params"],
print_verbose=print_verbose,
call_type=self.call_type,
)
elif callback == "sentry" and add_breadcrumb:
print_verbose("reaches sentry breadcrumbing")
details_to_log = copy.deepcopy(self.model_call_details)
if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None)
details_to_log.pop("input", None)
details_to_log.pop("prompt", None)
add_breadcrumb(
category="litellm.llm_call",
message=f"Model Call Details pre-call: {self.model_call_details}",
message=f"Model Call Details pre-call: {details_to_log}",
level="info",
)
elif isinstance(callback, CustomLogger): # custom logger class
@ -1315,7 +1302,7 @@ class Logging:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
)
self.redact_message_input_output_from_logging(result=original_response)
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
callbacks = litellm.input_callback + self.dynamic_input_callbacks
@ -1333,9 +1320,17 @@ class Logging:
)
elif callback == "sentry" and add_breadcrumb:
print_verbose("reaches sentry breadcrumbing")
details_to_log = copy.deepcopy(self.model_call_details)
if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None)
details_to_log.pop("input", None)
details_to_log.pop("prompt", None)
add_breadcrumb(
category="litellm.llm_call",
message=f"Model Call Details post-call: {self.model_call_details}",
message=f"Model Call Details post-call: {details_to_log}",
level="info",
)
elif isinstance(callback, CustomLogger): # custom logger class
@ -1527,6 +1522,8 @@ class Logging:
else:
callbacks = litellm.success_callback
self.redact_message_input_output_from_logging(result=result)
for callback in callbacks:
try:
litellm_params = self.model_call_details.get("litellm_params", {})
@ -2071,6 +2068,9 @@ class Logging:
callbacks.append(callback)
else:
callbacks = litellm._async_success_callback
self.redact_message_input_output_from_logging(result=result)
print_verbose(f"Async success callbacks: {callbacks}")
for callback in callbacks:
# check if callback can run for this request
@ -2232,7 +2232,10 @@ class Logging:
start_time=start_time,
end_time=end_time,
)
result = None # result sent to all loggers, init this to None incase it's not created
self.redact_message_input_output_from_logging(result=result)
for callback in litellm.failure_callback:
try:
if callback == "lite_debugger":
@ -2417,6 +2420,33 @@ class Logging:
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
)
def redact_message_input_output_from_logging(self, result):
"""
Removes messages, prompts, input, response from logging. This modifies the data in-place
only redacts when litellm.turn_off_message_logging == True
"""
# check if user opted out of logging message/response to callbacks
if litellm.turn_off_message_logging == True:
# remove messages, prompts, input, response from logging
self.model_call_details["messages"] = "redacted-by-litellm"
self.model_call_details["prompt"] = ""
self.model_call_details["input"] = ""
# response cleaning
# ChatCompletion Responses
if self.stream and "complete_streaming_response" in self.model_call_details:
_streaming_response = self.model_call_details[
"complete_streaming_response"
]
for choice in _streaming_response.choices:
choice.message.content = "redacted-by-litellm"
else:
if result is not None:
if isinstance(result, litellm.ModelResponse):
if hasattr(result, "choices"):
for choice in result.choices:
choice.message.content = "redacted-by-litellm"
def exception_logging(
additional_args={},
@ -2598,9 +2628,15 @@ def function_setup(
dynamic_success_callbacks = kwargs.pop("success_callback")
if add_breadcrumb:
details_to_log = copy.deepcopy(kwargs)
if litellm.turn_off_message_logging:
# make a copy of the _model_Call_details and log it
details_to_log.pop("messages", None)
details_to_log.pop("input", None)
details_to_log.pop("prompt", None)
add_breadcrumb(
category="litellm.llm_call",
message=f"Positional Args: {args}, Keyword Args: {kwargs}",
message=f"Positional Args: {args}, Keyword Args: {details_to_log}",
level="info",
)
if "logger_fn" in kwargs:

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.35.30"
version = "1.35.31"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.35.30"
version = "1.35.31"
version_files = [
"pyproject.toml:^version"
]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""]
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -39,6 +39,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
const [apiKey, setApiKey] = useState(null);
const [softBudget, setSoftBudget] = useState(null);
const [userModels, setUserModels] = useState([]);
const [modelsToPick, setModelsToPick] = useState([]);
const handleOk = () => {
setIsModalVisible(false);
form.resetFields();
@ -95,6 +96,30 @@ const CreateKey: React.FC<CreateKeyProps> = ({
message.success('API Key copied to clipboard');
};
useEffect(() => {
let tempModelsToPick = [];
if (team) {
if (team.models.length > 0) {
if (team.models.includes("all-proxy-models")) {
// if the team has all-proxy-models show all available models
tempModelsToPick = userModels;
} else {
// show team models
tempModelsToPick = team.models;
}
} else {
// show all available models if the team has no models set
tempModelsToPick = userModels;
}
} else {
// no team set, show all available models
tempModelsToPick = userModels;
}
setModelsToPick(tempModelsToPick);
}, [team, userModels]);
return (
<div>
@ -161,30 +186,15 @@ const CreateKey: React.FC<CreateKeyProps> = ({
<Option key="all-team-models" value="all-team-models">
All Team Models
</Option>
{team && team.models ? (
team.models.includes("all-proxy-models") ? (
userModels.map((model: string) => (
{
modelsToPick.map((model: string) => (
(
<Option key={model} value={model}>
{model}
</Option>
)
))
) : (
team.models.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)
) : (
userModels.map((model: string) => (
<Option key={model} value={model}>
{model}
</Option>
))
)}
}
</Select>
</Form.Item>
<Accordion className="mt-20 mb-8" >