forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_common_auth_params
This commit is contained in:
commit
1841b74f49
44 changed files with 531 additions and 254 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -51,3 +51,4 @@ loadtest_kub.yaml
|
||||||
litellm/proxy/_new_secret_config.yaml
|
litellm/proxy/_new_secret_config.yaml
|
||||||
litellm/proxy/_new_secret_config.yaml
|
litellm/proxy/_new_secret_config.yaml
|
||||||
litellm/proxy/_super_secret_config.yaml
|
litellm/proxy/_super_secret_config.yaml
|
||||||
|
litellm/proxy/_super_secret_config.yaml
|
||||||
|
|
|
@ -227,6 +227,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
|
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
|
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
|
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅
|
||||||
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
||||||
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
||||||
|
|
||||||
|
|
|
@ -167,6 +167,9 @@ messages = [
|
||||||
chat(messages)
|
chat(messages)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Redacting Messages, Response Content from Langfuse Logging
|
||||||
|
|
||||||
|
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
|
||||||
|
|
||||||
## Troubleshooting & Errors
|
## Troubleshooting & Errors
|
||||||
### Data not getting logged to Langfuse ?
|
### Data not getting logged to Langfuse ?
|
||||||
|
|
|
@ -40,5 +40,9 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
|
||||||
print(response)
|
print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Redacting Messages, Response Content from Sentry Logging
|
||||||
|
|
||||||
|
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to sentry, but request metadata will still be logged.
|
||||||
|
|
||||||
[Let us know](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+) if you need any additional options from Sentry.
|
[Let us know](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+) if you need any additional options from Sentry.
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ See [here](https://cloud.ibm.com/apidocs/watsonx-ai#api-authentication) for more
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/LiteLLM_IBM_Watsonx.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_IBM_Watsonx.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
# Slack Alerting
|
# 🚨 Alerting
|
||||||
|
|
||||||
Get alerts for:
|
Get alerts for:
|
||||||
- hanging LLM api calls
|
- Hanging LLM api calls
|
||||||
- failed LLM api calls
|
- Failed LLM api calls
|
||||||
- slow LLM api calls
|
- Slow LLM api calls
|
||||||
- budget Tracking per key/user:
|
- Budget Tracking per key/user:
|
||||||
- When a User/Key crosses their Budget
|
- When a User/Key crosses their Budget
|
||||||
- When a User/Key is 15% away from crossing their Budget
|
- When a User/Key is 15% away from crossing their Budget
|
||||||
- failed db read/writes
|
- Failed db read/writes
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
|
|
@ -569,6 +569,22 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
|
||||||
All requests made with these keys will log data to their team-specific logging.
|
All requests made with these keys will log data to their team-specific logging.
|
||||||
|
|
||||||
|
### Redacting Messages, Response Content from Langfuse Logging
|
||||||
|
|
||||||
|
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langfuse"]
|
||||||
|
turn_off_message_logging: True
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Logging Proxy Input/Output - DataDog
|
## Logging Proxy Input/Output - DataDog
|
||||||
We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog
|
We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,12 @@ const sidebars = {
|
||||||
"proxy/user_keys",
|
"proxy/user_keys",
|
||||||
"proxy/enterprise",
|
"proxy/enterprise",
|
||||||
"proxy/virtual_keys",
|
"proxy/virtual_keys",
|
||||||
|
"proxy/alerting",
|
||||||
|
{
|
||||||
|
type: "category",
|
||||||
|
label: "Logging",
|
||||||
|
items: ["proxy/logging", "proxy/streaming_logging"],
|
||||||
|
},
|
||||||
"proxy/team_based_routing",
|
"proxy/team_based_routing",
|
||||||
"proxy/ui",
|
"proxy/ui",
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
|
@ -58,11 +64,6 @@ const sidebars = {
|
||||||
"proxy/pii_masking",
|
"proxy/pii_masking",
|
||||||
"proxy/prompt_injection",
|
"proxy/prompt_injection",
|
||||||
"proxy/caching",
|
"proxy/caching",
|
||||||
{
|
|
||||||
type: "category",
|
|
||||||
label: "Logging, Alerting",
|
|
||||||
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
|
|
||||||
},
|
|
||||||
"proxy/prometheus",
|
"proxy/prometheus",
|
||||||
"proxy/call_hooks",
|
"proxy/call_hooks",
|
||||||
"proxy/rules",
|
"proxy/rules",
|
||||||
|
|
|
@ -45,6 +45,7 @@ _async_failure_callback: List[Callable] = (
|
||||||
) # internal variable - async custom callbacks are routed here.
|
) # internal variable - async custom callbacks are routed here.
|
||||||
pre_call_rules: List[Callable] = []
|
pre_call_rules: List[Callable] = []
|
||||||
post_call_rules: List[Callable] = []
|
post_call_rules: List[Callable] = []
|
||||||
|
turn_off_message_logging: Optional[bool] = False
|
||||||
## end of callbacks #############
|
## end of callbacks #############
|
||||||
|
|
||||||
email: Optional[str] = (
|
email: Optional[str] = (
|
||||||
|
|
|
@ -12,7 +12,9 @@ import litellm
|
||||||
|
|
||||||
class LangFuseLogger:
|
class LangFuseLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(self, langfuse_public_key=None, langfuse_secret=None):
|
def __init__(
|
||||||
|
self, langfuse_public_key=None, langfuse_secret=None, flush_interval=1
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
from langfuse import Langfuse
|
from langfuse import Langfuse
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -31,7 +33,7 @@ class LangFuseLogger:
|
||||||
host=self.langfuse_host,
|
host=self.langfuse_host,
|
||||||
release=self.langfuse_release,
|
release=self.langfuse_release,
|
||||||
debug=self.langfuse_debug,
|
debug=self.langfuse_debug,
|
||||||
flush_interval=1, # flush interval in seconds
|
flush_interval=flush_interval, # flush interval in seconds
|
||||||
)
|
)
|
||||||
|
|
||||||
# set the current langfuse project id in the environ
|
# set the current langfuse project id in the environ
|
||||||
|
|
|
@ -12,6 +12,7 @@ from litellm.caching import DualCache
|
||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
class SlackAlerting:
|
class SlackAlerting:
|
||||||
|
@ -47,6 +48,18 @@ class SlackAlerting:
|
||||||
self.internal_usage_cache = DualCache()
|
self.internal_usage_cache = DualCache()
|
||||||
self.async_http_handler = AsyncHTTPHandler()
|
self.async_http_handler = AsyncHTTPHandler()
|
||||||
self.alert_to_webhook_url = alert_to_webhook_url
|
self.alert_to_webhook_url = alert_to_webhook_url
|
||||||
|
self.langfuse_logger = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from litellm.integrations.langfuse import LangFuseLogger
|
||||||
|
|
||||||
|
self.langfuse_logger = LangFuseLogger(
|
||||||
|
os.getenv("LANGFUSE_PUBLIC_KEY"),
|
||||||
|
os.getenv("LANGFUSE_SECRET_KEY"),
|
||||||
|
flush_interval=1,
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -93,39 +106,68 @@ class SlackAlerting:
|
||||||
request_info: str,
|
request_info: str,
|
||||||
request_data: Optional[dict] = None,
|
request_data: Optional[dict] = None,
|
||||||
kwargs: Optional[dict] = None,
|
kwargs: Optional[dict] = None,
|
||||||
|
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
||||||
|
start_time: Optional[datetime.datetime] = None,
|
||||||
|
end_time: Optional[datetime.datetime] = None,
|
||||||
):
|
):
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
# For now: do nothing as we're debugging why this is not working as expected
|
# For now: do nothing as we're debugging why this is not working as expected
|
||||||
|
if request_data is not None:
|
||||||
|
trace_id = request_data.get("metadata", {}).get(
|
||||||
|
"trace_id", None
|
||||||
|
) # get langfuse trace id
|
||||||
|
if trace_id is None:
|
||||||
|
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
||||||
|
request_data["metadata"]["trace_id"] = trace_id
|
||||||
|
elif kwargs is not None:
|
||||||
|
_litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
trace_id = _litellm_params.get("metadata", {}).get(
|
||||||
|
"trace_id", None
|
||||||
|
) # get langfuse trace id
|
||||||
|
if trace_id is None:
|
||||||
|
trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
||||||
|
_litellm_params["metadata"]["trace_id"] = trace_id
|
||||||
|
|
||||||
|
# Log hanging request as an error on langfuse
|
||||||
|
if type == "hanging_request":
|
||||||
|
if self.langfuse_logger is not None:
|
||||||
|
_logging_kwargs = copy.deepcopy(request_data)
|
||||||
|
if _logging_kwargs is None:
|
||||||
|
_logging_kwargs = {}
|
||||||
|
_logging_kwargs["litellm_params"] = {}
|
||||||
|
request_data = request_data or {}
|
||||||
|
_logging_kwargs["litellm_params"]["metadata"] = request_data.get(
|
||||||
|
"metadata", {}
|
||||||
|
)
|
||||||
|
# log to langfuse in a separate thread
|
||||||
|
import threading
|
||||||
|
|
||||||
|
threading.Thread(
|
||||||
|
target=self.langfuse_logger.log_event,
|
||||||
|
args=(
|
||||||
|
_logging_kwargs,
|
||||||
|
None,
|
||||||
|
start_time,
|
||||||
|
end_time,
|
||||||
|
None,
|
||||||
|
print,
|
||||||
|
"ERROR",
|
||||||
|
"Requests is hanging",
|
||||||
|
),
|
||||||
|
).start()
|
||||||
|
|
||||||
|
_langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
||||||
|
_langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
|
||||||
|
|
||||||
|
# langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
|
||||||
|
|
||||||
|
_langfuse_url = (
|
||||||
|
f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
|
||||||
|
)
|
||||||
|
request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
|
||||||
return request_info
|
return request_info
|
||||||
|
|
||||||
# if request_data is not None:
|
|
||||||
# trace_id = request_data.get("metadata", {}).get(
|
|
||||||
# "trace_id", None
|
|
||||||
# ) # get langfuse trace id
|
|
||||||
# if trace_id is None:
|
|
||||||
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
|
||||||
# request_data["metadata"]["trace_id"] = trace_id
|
|
||||||
# elif kwargs is not None:
|
|
||||||
# _litellm_params = kwargs.get("litellm_params", {})
|
|
||||||
# trace_id = _litellm_params.get("metadata", {}).get(
|
|
||||||
# "trace_id", None
|
|
||||||
# ) # get langfuse trace id
|
|
||||||
# if trace_id is None:
|
|
||||||
# trace_id = "litellm-alert-trace-" + str(uuid.uuid4())
|
|
||||||
# _litellm_params["metadata"]["trace_id"] = trace_id
|
|
||||||
|
|
||||||
# _langfuse_host = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
|
||||||
# _langfuse_project_id = os.environ.get("LANGFUSE_PROJECT_ID")
|
|
||||||
|
|
||||||
# # langfuse urls look like: https://us.cloud.langfuse.com/project/************/traces/litellm-alert-trace-ididi9dk-09292-************
|
|
||||||
|
|
||||||
# _langfuse_url = (
|
|
||||||
# f"{_langfuse_host}/project/{_langfuse_project_id}/traces/{trace_id}"
|
|
||||||
# )
|
|
||||||
# request_info += f"\n🪢 Langfuse Trace: {_langfuse_url}"
|
|
||||||
# return request_info
|
|
||||||
|
|
||||||
def _response_taking_too_long_callback(
|
def _response_taking_too_long_callback(
|
||||||
self,
|
self,
|
||||||
kwargs, # kwargs to completion
|
kwargs, # kwargs to completion
|
||||||
|
@ -167,6 +209,14 @@ class SlackAlerting:
|
||||||
_deployment_latencies = metadata["_latency_per_deployment"]
|
_deployment_latencies = metadata["_latency_per_deployment"]
|
||||||
if len(_deployment_latencies) == 0:
|
if len(_deployment_latencies) == 0:
|
||||||
return None
|
return None
|
||||||
|
try:
|
||||||
|
# try sorting deployments by latency
|
||||||
|
_deployment_latencies = sorted(
|
||||||
|
_deployment_latencies.items(), key=lambda x: x[1]
|
||||||
|
)
|
||||||
|
_deployment_latencies = dict(_deployment_latencies)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
for api_base, latency in _deployment_latencies.items():
|
for api_base, latency in _deployment_latencies.items():
|
||||||
_message_to_send += f"\n{api_base}: {round(latency,2)}s"
|
_message_to_send += f"\n{api_base}: {round(latency,2)}s"
|
||||||
_message_to_send = "```" + _message_to_send + "```"
|
_message_to_send = "```" + _message_to_send + "```"
|
||||||
|
@ -194,7 +244,7 @@ class SlackAlerting:
|
||||||
if time_difference_float > self.alerting_threshold:
|
if time_difference_float > self.alerting_threshold:
|
||||||
if "langfuse" in litellm.success_callback:
|
if "langfuse" in litellm.success_callback:
|
||||||
request_info = self._add_langfuse_trace_id_to_alert(
|
request_info = self._add_langfuse_trace_id_to_alert(
|
||||||
request_info=request_info, kwargs=kwargs
|
request_info=request_info, kwargs=kwargs, type="slow_response"
|
||||||
)
|
)
|
||||||
# add deployment latencies to alert
|
# add deployment latencies to alert
|
||||||
if (
|
if (
|
||||||
|
@ -222,8 +272,8 @@ class SlackAlerting:
|
||||||
|
|
||||||
async def response_taking_too_long(
|
async def response_taking_too_long(
|
||||||
self,
|
self,
|
||||||
start_time: Optional[float] = None,
|
start_time: Optional[datetime.datetime] = None,
|
||||||
end_time: Optional[float] = None,
|
end_time: Optional[datetime.datetime] = None,
|
||||||
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
type: Literal["hanging_request", "slow_response"] = "hanging_request",
|
||||||
request_data: Optional[dict] = None,
|
request_data: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
|
@ -243,10 +293,6 @@ class SlackAlerting:
|
||||||
except:
|
except:
|
||||||
messages = ""
|
messages = ""
|
||||||
request_info = f"\nRequest Model: `{model}`\nMessages: `{messages}`"
|
request_info = f"\nRequest Model: `{model}`\nMessages: `{messages}`"
|
||||||
if "langfuse" in litellm.success_callback:
|
|
||||||
request_info = self._add_langfuse_trace_id_to_alert(
|
|
||||||
request_info=request_info, request_data=request_data
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
request_info = ""
|
request_info = ""
|
||||||
|
|
||||||
|
@ -288,6 +334,15 @@ class SlackAlerting:
|
||||||
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
|
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if "langfuse" in litellm.success_callback:
|
||||||
|
request_info = self._add_langfuse_trace_id_to_alert(
|
||||||
|
request_info=request_info,
|
||||||
|
request_data=request_data,
|
||||||
|
type="hanging_request",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
|
||||||
# add deployment latencies to alert
|
# add deployment latencies to alert
|
||||||
_deployment_latency_map = self._get_deployment_latencies_to_alert(
|
_deployment_latency_map = self._get_deployment_latencies_to_alert(
|
||||||
metadata=request_data.get("metadata", {})
|
metadata=request_data.get("metadata", {})
|
||||||
|
|
|
@ -447,6 +447,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
openai_aclient = client
|
openai_aclient = client
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=data["messages"],
|
input=data["messages"],
|
||||||
|
|
|
@ -1039,6 +1039,7 @@ async def async_streaming(
|
||||||
instances=None,
|
instances=None,
|
||||||
vertex_project=None,
|
vertex_project=None,
|
||||||
vertex_location=None,
|
vertex_location=None,
|
||||||
|
safety_settings=None,
|
||||||
**optional_params,
|
**optional_params,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -1065,6 +1066,7 @@ async def async_streaming(
|
||||||
response = await llm_model._generate_content_streaming_async(
|
response = await llm_model._generate_content_streaming_async(
|
||||||
contents=content,
|
contents=content,
|
||||||
generation_config=optional_params,
|
generation_config=optional_params,
|
||||||
|
safety_settings=safety_settings,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""]
|
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,23 +1,23 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: text-embedding-3-small
|
|
||||||
litellm_params:
|
|
||||||
model: text-embedding-3-small
|
|
||||||
- model_name: whisper
|
|
||||||
litellm_params:
|
|
||||||
model: azure/azure-whisper
|
|
||||||
api_version: 2024-02-15-preview
|
|
||||||
api_base: os.environ/AZURE_EUROPE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
|
||||||
model_info:
|
|
||||||
mode: audio_transcription
|
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
model: gpt-4
|
api_base: http://0.0.0.0:8080
|
||||||
model_name: gpt-4
|
api_key: my-fake-key
|
||||||
- model_name: azure-mistral
|
model: openai/my-fake-model
|
||||||
litellm_params:
|
model_name: fake-openai-endpoint
|
||||||
model: azure/mistral-large-latest
|
- litellm_params:
|
||||||
api_base: https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com
|
api_base: http://0.0.0.0:8080
|
||||||
api_key: os.environ/AZURE_MISTRAL_API_KEY
|
api_key: my-fake-key
|
||||||
|
model: openai/my-fake-model-2
|
||||||
# litellm_settings:
|
model_name: fake-openai-endpoint
|
||||||
# cache: True
|
- litellm_params:
|
||||||
|
api_base: http://0.0.0.0:8080
|
||||||
|
api_key: my-fake-key
|
||||||
|
model: openai/my-fake-model-3
|
||||||
|
model_name: fake-openai-endpoint
|
||||||
|
- litellm_params:
|
||||||
|
api_base: http://0.0.0.0:8080
|
||||||
|
api_key: my-fake-key
|
||||||
|
model: openai/my-fake-model-4
|
||||||
|
model_name: fake-openai-endpoint
|
||||||
|
router_settings:
|
||||||
|
num_retries: 0
|
|
@ -95,7 +95,15 @@ def common_checks(
|
||||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||||
)
|
)
|
||||||
# 7. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
# 7. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
if litellm.max_budget > 0 and global_proxy_spend is not None:
|
if (
|
||||||
|
litellm.max_budget > 0
|
||||||
|
and global_proxy_spend is not None
|
||||||
|
# only run global budget checks for OpenAI routes
|
||||||
|
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
||||||
|
and route in LiteLLMRoutes.openai_routes.value
|
||||||
|
and route != "/v1/models"
|
||||||
|
and route != "/models"
|
||||||
|
):
|
||||||
if global_proxy_spend > litellm.max_budget:
|
if global_proxy_spend > litellm.max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
||||||
|
|
|
@ -50,7 +50,6 @@ class Router:
|
||||||
model_names: List = []
|
model_names: List = []
|
||||||
cache_responses: Optional[bool] = False
|
cache_responses: Optional[bool] = False
|
||||||
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
||||||
num_retries: int = 0
|
|
||||||
tenacity = None
|
tenacity = None
|
||||||
leastbusy_logger: Optional[LeastBusyLoggingHandler] = None
|
leastbusy_logger: Optional[LeastBusyLoggingHandler] = None
|
||||||
lowesttpm_logger: Optional[LowestTPMLoggingHandler] = None
|
lowesttpm_logger: Optional[LowestTPMLoggingHandler] = None
|
||||||
|
@ -70,9 +69,11 @@ class Router:
|
||||||
] = None, # if you want to cache across model groups
|
] = None, # if you want to cache across model groups
|
||||||
client_ttl: int = 3600, # ttl for cached clients - will re-initialize after this time in seconds
|
client_ttl: int = 3600, # ttl for cached clients - will re-initialize after this time in seconds
|
||||||
## RELIABILITY ##
|
## RELIABILITY ##
|
||||||
num_retries: int = 0,
|
num_retries: Optional[int] = None,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
default_litellm_params={}, # default params for Router.chat.completion.create
|
default_litellm_params: Optional[
|
||||||
|
dict
|
||||||
|
] = None, # default params for Router.chat.completion.create
|
||||||
default_max_parallel_requests: Optional[int] = None,
|
default_max_parallel_requests: Optional[int] = None,
|
||||||
set_verbose: bool = False,
|
set_verbose: bool = False,
|
||||||
debug_level: Literal["DEBUG", "INFO"] = "INFO",
|
debug_level: Literal["DEBUG", "INFO"] = "INFO",
|
||||||
|
@ -158,6 +159,7 @@ class Router:
|
||||||
router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
|
router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if semaphore:
|
if semaphore:
|
||||||
self.semaphore = semaphore
|
self.semaphore = semaphore
|
||||||
self.set_verbose = set_verbose
|
self.set_verbose = set_verbose
|
||||||
|
@ -229,7 +231,14 @@ class Router:
|
||||||
self.failed_calls = (
|
self.failed_calls = (
|
||||||
InMemoryCache()
|
InMemoryCache()
|
||||||
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
||||||
self.num_retries = num_retries or litellm.num_retries or 0
|
|
||||||
|
if num_retries is not None:
|
||||||
|
self.num_retries = num_retries
|
||||||
|
elif litellm.num_retries is not None:
|
||||||
|
self.num_retries = litellm.num_retries
|
||||||
|
else:
|
||||||
|
self.num_retries = openai.DEFAULT_MAX_RETRIES
|
||||||
|
|
||||||
self.timeout = timeout or litellm.request_timeout
|
self.timeout = timeout or litellm.request_timeout
|
||||||
|
|
||||||
self.retry_after = retry_after
|
self.retry_after = retry_after
|
||||||
|
@ -255,6 +264,7 @@ class Router:
|
||||||
) # dict to store aliases for router, ex. {"gpt-4": "gpt-3.5-turbo"}, all requests with gpt-4 -> get routed to gpt-3.5-turbo group
|
) # dict to store aliases for router, ex. {"gpt-4": "gpt-3.5-turbo"}, all requests with gpt-4 -> get routed to gpt-3.5-turbo group
|
||||||
|
|
||||||
# make Router.chat.completions.create compatible for openai.chat.completions.create
|
# make Router.chat.completions.create compatible for openai.chat.completions.create
|
||||||
|
default_litellm_params = default_litellm_params or {}
|
||||||
self.chat = litellm.Chat(params=default_litellm_params, router_obj=self)
|
self.chat = litellm.Chat(params=default_litellm_params, router_obj=self)
|
||||||
|
|
||||||
# default litellm args
|
# default litellm args
|
||||||
|
@ -428,6 +438,7 @@ class Router:
|
||||||
kwargs["messages"] = messages
|
kwargs["messages"] = messages
|
||||||
kwargs["original_function"] = self._acompletion
|
kwargs["original_function"] = self._acompletion
|
||||||
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
||||||
|
|
||||||
timeout = kwargs.get("request_timeout", self.timeout)
|
timeout = kwargs.get("request_timeout", self.timeout)
|
||||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||||
|
|
||||||
|
@ -469,6 +480,7 @@ class Router:
|
||||||
)
|
)
|
||||||
kwargs["model_info"] = deployment.get("model_info", {})
|
kwargs["model_info"] = deployment.get("model_info", {})
|
||||||
data = deployment["litellm_params"].copy()
|
data = deployment["litellm_params"].copy()
|
||||||
|
|
||||||
model_name = data["model"]
|
model_name = data["model"]
|
||||||
for k, v in self.default_litellm_params.items():
|
for k, v in self.default_litellm_params.items():
|
||||||
if (
|
if (
|
||||||
|
@ -1415,10 +1427,12 @@ class Router:
|
||||||
context_window_fallbacks = kwargs.pop(
|
context_window_fallbacks = kwargs.pop(
|
||||||
"context_window_fallbacks", self.context_window_fallbacks
|
"context_window_fallbacks", self.context_window_fallbacks
|
||||||
)
|
)
|
||||||
verbose_router_logger.debug(
|
|
||||||
f"async function w/ retries: original_function - {original_function}"
|
|
||||||
)
|
|
||||||
num_retries = kwargs.pop("num_retries")
|
num_retries = kwargs.pop("num_retries")
|
||||||
|
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
||||||
response = await original_function(*args, **kwargs)
|
response = await original_function(*args, **kwargs)
|
||||||
|
@ -1445,6 +1459,7 @@ class Router:
|
||||||
await asyncio.sleep(timeout)
|
await asyncio.sleep(timeout)
|
||||||
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
|
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
|
||||||
raise e # don't wait to retry if deployment hits user-defined rate-limit
|
raise e # don't wait to retry if deployment hits user-defined rate-limit
|
||||||
|
|
||||||
elif hasattr(original_exception, "status_code") and litellm._should_retry(
|
elif hasattr(original_exception, "status_code") and litellm._should_retry(
|
||||||
status_code=original_exception.status_code
|
status_code=original_exception.status_code
|
||||||
):
|
):
|
||||||
|
@ -1606,6 +1621,28 @@ class Router:
|
||||||
raise e
|
raise e
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
|
def _router_should_retry(
|
||||||
|
self, e: Exception, remaining_retries: int, num_retries: int
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Calculate back-off, then retry
|
||||||
|
"""
|
||||||
|
if hasattr(e, "response") and hasattr(e.response, "headers"):
|
||||||
|
timeout = litellm._calculate_retry_after(
|
||||||
|
remaining_retries=remaining_retries,
|
||||||
|
max_retries=num_retries,
|
||||||
|
response_headers=e.response.headers,
|
||||||
|
min_timeout=self.retry_after,
|
||||||
|
)
|
||||||
|
time.sleep(timeout)
|
||||||
|
else:
|
||||||
|
timeout = litellm._calculate_retry_after(
|
||||||
|
remaining_retries=remaining_retries,
|
||||||
|
max_retries=num_retries,
|
||||||
|
min_timeout=self.retry_after,
|
||||||
|
)
|
||||||
|
time.sleep(timeout)
|
||||||
|
|
||||||
def function_with_retries(self, *args, **kwargs):
|
def function_with_retries(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Try calling the model 3 times. Shuffle between available deployments.
|
Try calling the model 3 times. Shuffle between available deployments.
|
||||||
|
@ -1625,9 +1662,6 @@ class Router:
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
original_exception = e
|
original_exception = e
|
||||||
verbose_router_logger.debug(
|
|
||||||
f"num retries in function with retries: {num_retries}"
|
|
||||||
)
|
|
||||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
|
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
|
||||||
if (
|
if (
|
||||||
isinstance(original_exception, litellm.ContextWindowExceededError)
|
isinstance(original_exception, litellm.ContextWindowExceededError)
|
||||||
|
@ -1641,6 +1675,11 @@ class Router:
|
||||||
if num_retries > 0:
|
if num_retries > 0:
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||||
### RETRY
|
### RETRY
|
||||||
|
self._router_should_retry(
|
||||||
|
e=original_exception,
|
||||||
|
remaining_retries=num_retries,
|
||||||
|
num_retries=num_retries,
|
||||||
|
)
|
||||||
for current_attempt in range(num_retries):
|
for current_attempt in range(num_retries):
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
|
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
|
||||||
|
@ -1654,34 +1693,11 @@ class Router:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||||
remaining_retries = num_retries - current_attempt
|
remaining_retries = num_retries - current_attempt
|
||||||
if "No models available" in str(e):
|
self._router_should_retry(
|
||||||
timeout = litellm._calculate_retry_after(
|
e=e,
|
||||||
remaining_retries=remaining_retries,
|
remaining_retries=remaining_retries,
|
||||||
max_retries=num_retries,
|
num_retries=num_retries,
|
||||||
min_timeout=self.retry_after,
|
|
||||||
)
|
)
|
||||||
time.sleep(timeout)
|
|
||||||
elif (
|
|
||||||
hasattr(e, "status_code")
|
|
||||||
and hasattr(e, "response")
|
|
||||||
and litellm._should_retry(status_code=e.status_code)
|
|
||||||
):
|
|
||||||
if hasattr(e.response, "headers"):
|
|
||||||
timeout = litellm._calculate_retry_after(
|
|
||||||
remaining_retries=remaining_retries,
|
|
||||||
max_retries=num_retries,
|
|
||||||
response_headers=e.response.headers,
|
|
||||||
min_timeout=self.retry_after,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
timeout = litellm._calculate_retry_after(
|
|
||||||
remaining_retries=remaining_retries,
|
|
||||||
max_retries=num_retries,
|
|
||||||
min_timeout=self.retry_after,
|
|
||||||
)
|
|
||||||
time.sleep(timeout)
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
### HELPER FUNCTIONS
|
### HELPER FUNCTIONS
|
||||||
|
@ -1979,6 +1995,8 @@ class Router:
|
||||||
# check if it ends with a trailing slash
|
# check if it ends with a trailing slash
|
||||||
if api_base.endswith("/"):
|
if api_base.endswith("/"):
|
||||||
api_base += "v1/"
|
api_base += "v1/"
|
||||||
|
elif api_base.endswith("/v1"):
|
||||||
|
api_base += "/"
|
||||||
else:
|
else:
|
||||||
api_base += "/v1/"
|
api_base += "/v1/"
|
||||||
|
|
||||||
|
@ -2004,7 +2022,9 @@ class Router:
|
||||||
stream_timeout = litellm.get_secret(stream_timeout_env_name)
|
stream_timeout = litellm.get_secret(stream_timeout_env_name)
|
||||||
litellm_params["stream_timeout"] = stream_timeout
|
litellm_params["stream_timeout"] = stream_timeout
|
||||||
|
|
||||||
max_retries = litellm_params.pop("max_retries", 2)
|
max_retries = litellm_params.pop(
|
||||||
|
"max_retries", 0
|
||||||
|
) # router handles retry logic
|
||||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||||
max_retries = litellm.get_secret(max_retries_env_name)
|
max_retries = litellm.get_secret(max_retries_env_name)
|
||||||
|
|
|
@ -119,7 +119,9 @@ def test_multiple_deployments_parallel():
|
||||||
|
|
||||||
|
|
||||||
# test_multiple_deployments_parallel()
|
# test_multiple_deployments_parallel()
|
||||||
def test_cooldown_same_model_name():
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cooldown_same_model_name(sync_mode):
|
||||||
# users could have the same model with different api_base
|
# users could have the same model with different api_base
|
||||||
# example
|
# example
|
||||||
# azure/chatgpt, api_base: 1234
|
# azure/chatgpt, api_base: 1234
|
||||||
|
@ -161,6 +163,7 @@ def test_cooldown_same_model_name():
|
||||||
num_retries=3,
|
num_retries=3,
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
response = router.completion(
|
response = router.completion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||||
|
@ -176,6 +179,23 @@ def test_cooldown_same_model_name():
|
||||||
model_ids[0] != model_ids[1]
|
model_ids[0] != model_ids[1]
|
||||||
) # ensure both models have a uuid added, and they have different names
|
) # ensure both models have a uuid added, and they have different names
|
||||||
|
|
||||||
|
print("\ngot response\n", response)
|
||||||
|
else:
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||||
|
)
|
||||||
|
print(router.model_list)
|
||||||
|
model_ids = []
|
||||||
|
for model in router.model_list:
|
||||||
|
model_ids.append(model["model_info"]["id"])
|
||||||
|
print("\n litellm model ids ", model_ids)
|
||||||
|
|
||||||
|
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
||||||
|
assert (
|
||||||
|
model_ids[0] != model_ids[1]
|
||||||
|
) # ensure both models have a uuid added, and they have different names
|
||||||
|
|
||||||
print("\ngot response\n", response)
|
print("\ngot response\n", response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Got unexpected exception on router! - {e}")
|
pytest.fail(f"Got unexpected exception on router! - {e}")
|
||||||
|
|
|
@ -161,40 +161,54 @@ async def make_async_calls():
|
||||||
return total_time
|
return total_time
|
||||||
|
|
||||||
|
|
||||||
# def test_langfuse_logging_async_text_completion():
|
@pytest.mark.asyncio
|
||||||
# try:
|
@pytest.mark.parametrize("stream", [False, True])
|
||||||
# pre_langfuse_setup()
|
async def test_langfuse_logging_without_request_response(stream):
|
||||||
# litellm.set_verbose = False
|
try:
|
||||||
# litellm.success_callback = ["langfuse"]
|
import uuid
|
||||||
|
|
||||||
# async def _test_langfuse():
|
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
|
||||||
# response = await litellm.atext_completion(
|
litellm.set_verbose = True
|
||||||
# model="gpt-3.5-turbo-instruct",
|
litellm.turn_off_message_logging = True
|
||||||
# prompt="this is a test",
|
litellm.success_callback = ["langfuse"]
|
||||||
# max_tokens=5,
|
response = await litellm.acompletion(
|
||||||
# temperature=0.7,
|
model="gpt-3.5-turbo",
|
||||||
# timeout=5,
|
mock_response="It's simple to use and easy to get started",
|
||||||
# user="test_user",
|
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
|
||||||
# stream=True
|
max_tokens=10,
|
||||||
# )
|
temperature=0.2,
|
||||||
# async for chunk in response:
|
stream=stream,
|
||||||
# print()
|
metadata={"trace_id": _unique_trace_name},
|
||||||
# print(chunk)
|
)
|
||||||
# await asyncio.sleep(1)
|
print(response)
|
||||||
# return response
|
if stream:
|
||||||
|
async for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
|
||||||
# response = asyncio.run(_test_langfuse())
|
await asyncio.sleep(3)
|
||||||
# print(f"response: {response}")
|
|
||||||
|
|
||||||
# # # check langfuse.log to see if there was a failed response
|
import langfuse
|
||||||
# search_logs("langfuse.log")
|
|
||||||
# except litellm.Timeout as e:
|
|
||||||
# pass
|
|
||||||
# except Exception as e:
|
|
||||||
# pytest.fail(f"An exception occurred - {e}")
|
|
||||||
|
|
||||||
|
langfuse_client = langfuse.Langfuse(
|
||||||
|
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
|
||||||
|
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
|
||||||
|
)
|
||||||
|
|
||||||
# test_langfuse_logging_async_text_completion()
|
# get trace with _unique_trace_name
|
||||||
|
trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
|
||||||
|
|
||||||
|
print("trace_from_langfuse", trace)
|
||||||
|
|
||||||
|
_trace_data = trace.data
|
||||||
|
|
||||||
|
assert _trace_data[0].input == {"messages": "redacted-by-litellm"}
|
||||||
|
assert _trace_data[0].output == {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "redacted-by-litellm",
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
||||||
|
|
|
@ -2730,7 +2730,6 @@ async def test_acompletion_watsonx():
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
max_tokens=80,
|
max_tokens=80,
|
||||||
space_id=os.getenv("WATSONX_SPACE_ID_TEST"),
|
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
|
|
|
@ -483,6 +483,8 @@ def test_mistral_embeddings():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="local test")
|
||||||
def test_watsonx_embeddings():
|
def test_watsonx_embeddings():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
|
@ -201,6 +201,7 @@ async def test_router_atext_completion_streaming():
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_router_completion_streaming():
|
async def test_router_completion_streaming():
|
||||||
|
litellm.set_verbose = True
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": "Hello, can you generate a 500 words poem?"}
|
{"role": "user", "content": "Hello, can you generate a 500 words poem?"}
|
||||||
]
|
]
|
||||||
|
@ -219,9 +220,9 @@ async def test_router_completion_streaming():
|
||||||
{
|
{
|
||||||
"model_name": "azure-model",
|
"model_name": "azure-model",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"model": "azure/gpt-35-turbo",
|
"model": "azure/gpt-turbo",
|
||||||
"api_key": "os.environ/AZURE_EUROPE_API_KEY",
|
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
||||||
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
|
"api_base": "https://openai-france-1234.openai.azure.com",
|
||||||
"rpm": 6,
|
"rpm": 6,
|
||||||
},
|
},
|
||||||
"model_info": {"id": 2},
|
"model_info": {"id": 2},
|
||||||
|
@ -229,9 +230,9 @@ async def test_router_completion_streaming():
|
||||||
{
|
{
|
||||||
"model_name": "azure-model",
|
"model_name": "azure-model",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"model": "azure/gpt-35-turbo",
|
"model": "azure/gpt-turbo",
|
||||||
"api_key": "os.environ/AZURE_CANADA_API_KEY",
|
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
||||||
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
|
"api_base": "https://openai-france-1234.openai.azure.com",
|
||||||
"rpm": 6,
|
"rpm": 6,
|
||||||
},
|
},
|
||||||
"model_info": {"id": 3},
|
"model_info": {"id": 3},
|
||||||
|
@ -262,4 +263,4 @@ async def test_router_completion_streaming():
|
||||||
## check if calls equally distributed
|
## check if calls equally distributed
|
||||||
cache_dict = router.cache.get_cache(key=cache_key)
|
cache_dict = router.cache.get_cache(key=cache_key)
|
||||||
for k, v in cache_dict.items():
|
for k, v in cache_dict.items():
|
||||||
assert v == 1
|
assert v == 1, f"Failed. K={k} called v={v} times, cache_dict={cache_dict}"
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#### What this tests ####
|
#### What this tests ####
|
||||||
# This tests litellm router
|
# This tests litellm router
|
||||||
|
|
||||||
import sys, os, time
|
import sys, os, time, openai
|
||||||
import traceback, asyncio
|
import traceback, asyncio
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -19,6 +19,45 @@ import os, httpx
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("num_retries", [None, 2])
|
||||||
|
@pytest.mark.parametrize("max_retries", [None, 4])
|
||||||
|
def test_router_num_retries_init(num_retries, max_retries):
|
||||||
|
"""
|
||||||
|
- test when num_retries set v/s not
|
||||||
|
- test client value when max retries set v/s not
|
||||||
|
"""
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # openai model name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": "bad-key",
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
"max_retries": max_retries,
|
||||||
|
},
|
||||||
|
"model_info": {"id": 12345},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
num_retries=num_retries,
|
||||||
|
)
|
||||||
|
|
||||||
|
if num_retries is not None:
|
||||||
|
assert router.num_retries == num_retries
|
||||||
|
else:
|
||||||
|
assert router.num_retries == openai.DEFAULT_MAX_RETRIES
|
||||||
|
|
||||||
|
model_client = router._get_client(
|
||||||
|
{"model_info": {"id": 12345}}, client_type="async", kwargs={}
|
||||||
|
)
|
||||||
|
|
||||||
|
if max_retries is not None:
|
||||||
|
assert getattr(model_client, "max_retries") == max_retries
|
||||||
|
else:
|
||||||
|
assert getattr(model_client, "max_retries") == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)]
|
"timeout", [10, 1.0, httpx.Timeout(timeout=300.0, connect=20.0)]
|
||||||
)
|
)
|
||||||
|
@ -99,6 +138,7 @@ def test_router_azure_ai_studio_init(mistral_api_base):
|
||||||
print(f"uri_reference: {uri_reference}")
|
print(f"uri_reference: {uri_reference}")
|
||||||
|
|
||||||
assert "/v1/" in uri_reference
|
assert "/v1/" in uri_reference
|
||||||
|
assert uri_reference.count("v1") == 1
|
||||||
|
|
||||||
|
|
||||||
def test_exception_raising():
|
def test_exception_raising():
|
||||||
|
|
|
@ -258,6 +258,7 @@ def test_sync_fallbacks_embeddings():
|
||||||
model_list=model_list,
|
model_list=model_list,
|
||||||
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
|
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
|
||||||
set_verbose=False,
|
set_verbose=False,
|
||||||
|
num_retries=0,
|
||||||
)
|
)
|
||||||
customHandler = MyCustomHandler()
|
customHandler = MyCustomHandler()
|
||||||
litellm.callbacks = [customHandler]
|
litellm.callbacks = [customHandler]
|
||||||
|
@ -393,7 +394,7 @@ def test_dynamic_fallbacks_sync():
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
router = Router(model_list=model_list, set_verbose=True)
|
router = Router(model_list=model_list, set_verbose=True, num_retries=0)
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
kwargs["model"] = "azure/gpt-3.5-turbo"
|
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||||
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -830,6 +831,7 @@ def test_usage_based_routing_fallbacks():
|
||||||
routing_strategy="usage-based-routing",
|
routing_strategy="usage-based-routing",
|
||||||
redis_host=os.environ["REDIS_HOST"],
|
redis_host=os.environ["REDIS_HOST"],
|
||||||
redis_port=os.environ["REDIS_PORT"],
|
redis_port=os.environ["REDIS_PORT"],
|
||||||
|
num_retries=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
|
|
|
@ -203,7 +203,7 @@ def test_timeouts_router():
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
router = Router(model_list=model_list)
|
router = Router(model_list=model_list, num_retries=0)
|
||||||
|
|
||||||
print("PASSED !")
|
print("PASSED !")
|
||||||
|
|
||||||
|
@ -396,7 +396,9 @@ def test_router_init_gpt_4_vision_enhancements():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_openai_with_organization():
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_openai_with_organization(sync_mode):
|
||||||
try:
|
try:
|
||||||
print("Testing OpenAI with organization")
|
print("Testing OpenAI with organization")
|
||||||
model_list = [
|
model_list = [
|
||||||
|
@ -418,6 +420,7 @@ def test_openai_with_organization():
|
||||||
print(router.model_list)
|
print(router.model_list)
|
||||||
print(router.model_list[0])
|
print(router.model_list[0])
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
openai_client = router._get_client(
|
openai_client = router._get_client(
|
||||||
deployment=router.model_list[0],
|
deployment=router.model_list[0],
|
||||||
kwargs={"input": ["hello"], "model": "openai-bad-org"},
|
kwargs={"input": ["hello"], "model": "openai-bad-org"},
|
||||||
|
@ -433,7 +436,9 @@ def test_openai_with_organization():
|
||||||
model="openai-bad-org",
|
model="openai-bad-org",
|
||||||
messages=[{"role": "user", "content": "this is a test"}],
|
messages=[{"role": "user", "content": "this is a test"}],
|
||||||
)
|
)
|
||||||
pytest.fail("Request should have failed - This organization does not exist")
|
pytest.fail(
|
||||||
|
"Request should have failed - This organization does not exist"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got exception: " + str(e))
|
print("Got exception: " + str(e))
|
||||||
assert "No such organization: org-ikDc4ex8NB" in str(e)
|
assert "No such organization: org-ikDc4ex8NB" in str(e)
|
||||||
|
@ -444,6 +449,36 @@ def test_openai_with_organization():
|
||||||
messages=[{"role": "user", "content": "this is a test"}],
|
messages=[{"role": "user", "content": "this is a test"}],
|
||||||
max_tokens=5,
|
max_tokens=5,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
openai_client = router._get_client(
|
||||||
|
deployment=router.model_list[0],
|
||||||
|
kwargs={"input": ["hello"], "model": "openai-bad-org"},
|
||||||
|
client_type="async",
|
||||||
|
)
|
||||||
|
print(vars(openai_client))
|
||||||
|
|
||||||
|
assert openai_client.organization == "org-ikDc4ex8NB"
|
||||||
|
|
||||||
|
# bad org raises error
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="openai-bad-org",
|
||||||
|
messages=[{"role": "user", "content": "this is a test"}],
|
||||||
|
)
|
||||||
|
pytest.fail(
|
||||||
|
"Request should have failed - This organization does not exist"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("Got exception: " + str(e))
|
||||||
|
assert "No such organization: org-ikDc4ex8NB" in str(e)
|
||||||
|
|
||||||
|
# good org works
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="openai-good-org",
|
||||||
|
messages=[{"role": "user", "content": "this is a test"}],
|
||||||
|
max_tokens=5,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
|
@ -57,6 +57,7 @@ def test_router_timeouts():
|
||||||
redis_password=os.getenv("REDIS_PASSWORD"),
|
redis_password=os.getenv("REDIS_PASSWORD"),
|
||||||
redis_port=int(os.getenv("REDIS_PORT")),
|
redis_port=int(os.getenv("REDIS_PORT")),
|
||||||
timeout=10,
|
timeout=10,
|
||||||
|
num_retries=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
print("***** TPM SETTINGS *****")
|
print("***** TPM SETTINGS *****")
|
||||||
|
@ -89,15 +90,15 @@ def test_router_timeouts():
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_router_timeouts_bedrock():
|
async def test_router_timeouts_bedrock():
|
||||||
import openai
|
import openai, uuid
|
||||||
|
|
||||||
# Model list for OpenAI and Anthropic models
|
# Model list for OpenAI and Anthropic models
|
||||||
model_list = [
|
_model_list = [
|
||||||
{
|
{
|
||||||
"model_name": "bedrock",
|
"model_name": "bedrock",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"model": "bedrock/anthropic.claude-instant-v1",
|
"model": "bedrock/anthropic.claude-instant-v1",
|
||||||
"timeout": 0.001,
|
"timeout": 0.00001,
|
||||||
},
|
},
|
||||||
"tpm": 80000,
|
"tpm": 80000,
|
||||||
},
|
},
|
||||||
|
@ -105,17 +106,18 @@ async def test_router_timeouts_bedrock():
|
||||||
|
|
||||||
# Configure router
|
# Configure router
|
||||||
router = Router(
|
router = Router(
|
||||||
model_list=model_list,
|
model_list=_model_list,
|
||||||
routing_strategy="usage-based-routing",
|
routing_strategy="usage-based-routing",
|
||||||
debug_level="DEBUG",
|
debug_level="DEBUG",
|
||||||
set_verbose=True,
|
set_verbose=True,
|
||||||
|
num_retries=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
try:
|
try:
|
||||||
response = await router.acompletion(
|
response = await router.acompletion(
|
||||||
model="bedrock",
|
model="bedrock",
|
||||||
messages=[{"role": "user", "content": "hello, who are u"}],
|
messages=[{"role": "user", "content": f"hello, who are u {uuid.uuid4()}"}],
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
pytest.fail("Did not raise error `openai.APITimeoutError`")
|
pytest.fail("Did not raise error `openai.APITimeoutError`")
|
||||||
|
|
|
@ -78,7 +78,8 @@ def test_hanging_request_azure():
|
||||||
"model_name": "openai-gpt",
|
"model_name": "openai-gpt",
|
||||||
"litellm_params": {"model": "gpt-3.5-turbo"},
|
"litellm_params": {"model": "gpt-3.5-turbo"},
|
||||||
},
|
},
|
||||||
]
|
],
|
||||||
|
num_retries=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
|
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
|
||||||
|
@ -131,7 +132,8 @@ def test_hanging_request_openai():
|
||||||
"model_name": "openai-gpt",
|
"model_name": "openai-gpt",
|
||||||
"litellm_params": {"model": "gpt-3.5-turbo"},
|
"litellm_params": {"model": "gpt-3.5-turbo"},
|
||||||
},
|
},
|
||||||
]
|
],
|
||||||
|
num_retries=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
|
encoded = litellm.utils.encode(model="gpt-3.5-turbo", text="blue")[0]
|
||||||
|
@ -189,6 +191,7 @@ def test_timeout_streaming():
|
||||||
# test_timeout_streaming()
|
# test_timeout_streaming()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="local test")
|
||||||
def test_timeout_ollama():
|
def test_timeout_ollama():
|
||||||
# this Will Raise a timeout
|
# this Will Raise a timeout
|
||||||
import litellm
|
import litellm
|
||||||
|
|
|
@ -110,7 +110,7 @@ class LiteLLM_Params(BaseModel):
|
||||||
stream_timeout: Optional[Union[float, str]] = (
|
stream_timeout: Optional[Union[float, str]] = (
|
||||||
None # timeout when making stream=True calls, if str, pass in as os.environ/
|
None # timeout when making stream=True calls, if str, pass in as os.environ/
|
||||||
)
|
)
|
||||||
max_retries: int = 2 # follows openai default of 2
|
max_retries: Optional[int] = None
|
||||||
organization: Optional[str] = None # for openai orgs
|
organization: Optional[str] = None # for openai orgs
|
||||||
## VERTEX AI ##
|
## VERTEX AI ##
|
||||||
vertex_project: Optional[str] = None
|
vertex_project: Optional[str] = None
|
||||||
|
@ -148,9 +148,7 @@ class LiteLLM_Params(BaseModel):
|
||||||
args.pop("self", None)
|
args.pop("self", None)
|
||||||
args.pop("params", None)
|
args.pop("params", None)
|
||||||
args.pop("__class__", None)
|
args.pop("__class__", None)
|
||||||
if max_retries is None:
|
if max_retries is not None and isinstance(max_retries, str):
|
||||||
max_retries = 2
|
|
||||||
elif isinstance(max_retries, str):
|
|
||||||
max_retries = int(max_retries) # cast to int
|
max_retries = int(max_retries) # cast to int
|
||||||
super().__init__(max_retries=max_retries, **args, **params)
|
super().__init__(max_retries=max_retries, **args, **params)
|
||||||
|
|
||||||
|
|
|
@ -1212,7 +1212,6 @@ class Logging:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
||||||
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
|
@ -1229,29 +1228,17 @@ class Logging:
|
||||||
litellm_call_id=self.litellm_params["litellm_call_id"],
|
litellm_call_id=self.litellm_params["litellm_call_id"],
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif callback == "lite_debugger":
|
|
||||||
print_verbose(
|
|
||||||
f"reaches litedebugger for logging! - model_call_details {self.model_call_details}"
|
|
||||||
)
|
|
||||||
model = self.model_call_details["model"]
|
|
||||||
messages = self.model_call_details["input"]
|
|
||||||
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
|
|
||||||
liteDebuggerClient.input_log_event(
|
|
||||||
model=model,
|
|
||||||
messages=messages,
|
|
||||||
end_user=self.model_call_details.get("user", "default"),
|
|
||||||
litellm_call_id=self.litellm_params["litellm_call_id"],
|
|
||||||
litellm_params=self.model_call_details["litellm_params"],
|
|
||||||
optional_params=self.model_call_details["optional_params"],
|
|
||||||
print_verbose=print_verbose,
|
|
||||||
call_type=self.call_type,
|
|
||||||
)
|
|
||||||
elif callback == "sentry" and add_breadcrumb:
|
elif callback == "sentry" and add_breadcrumb:
|
||||||
print_verbose("reaches sentry breadcrumbing")
|
details_to_log = copy.deepcopy(self.model_call_details)
|
||||||
|
if litellm.turn_off_message_logging:
|
||||||
|
# make a copy of the _model_Call_details and log it
|
||||||
|
details_to_log.pop("messages", None)
|
||||||
|
details_to_log.pop("input", None)
|
||||||
|
details_to_log.pop("prompt", None)
|
||||||
|
|
||||||
add_breadcrumb(
|
add_breadcrumb(
|
||||||
category="litellm.llm_call",
|
category="litellm.llm_call",
|
||||||
message=f"Model Call Details pre-call: {self.model_call_details}",
|
message=f"Model Call Details pre-call: {details_to_log}",
|
||||||
level="info",
|
level="info",
|
||||||
)
|
)
|
||||||
elif isinstance(callback, CustomLogger): # custom logger class
|
elif isinstance(callback, CustomLogger): # custom logger class
|
||||||
|
@ -1315,7 +1302,7 @@ class Logging:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
self.redact_message_input_output_from_logging(result=original_response)
|
||||||
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
||||||
|
|
||||||
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
||||||
|
@ -1333,9 +1320,17 @@ class Logging:
|
||||||
)
|
)
|
||||||
elif callback == "sentry" and add_breadcrumb:
|
elif callback == "sentry" and add_breadcrumb:
|
||||||
print_verbose("reaches sentry breadcrumbing")
|
print_verbose("reaches sentry breadcrumbing")
|
||||||
|
|
||||||
|
details_to_log = copy.deepcopy(self.model_call_details)
|
||||||
|
if litellm.turn_off_message_logging:
|
||||||
|
# make a copy of the _model_Call_details and log it
|
||||||
|
details_to_log.pop("messages", None)
|
||||||
|
details_to_log.pop("input", None)
|
||||||
|
details_to_log.pop("prompt", None)
|
||||||
|
|
||||||
add_breadcrumb(
|
add_breadcrumb(
|
||||||
category="litellm.llm_call",
|
category="litellm.llm_call",
|
||||||
message=f"Model Call Details post-call: {self.model_call_details}",
|
message=f"Model Call Details post-call: {details_to_log}",
|
||||||
level="info",
|
level="info",
|
||||||
)
|
)
|
||||||
elif isinstance(callback, CustomLogger): # custom logger class
|
elif isinstance(callback, CustomLogger): # custom logger class
|
||||||
|
@ -1527,6 +1522,8 @@ class Logging:
|
||||||
else:
|
else:
|
||||||
callbacks = litellm.success_callback
|
callbacks = litellm.success_callback
|
||||||
|
|
||||||
|
self.redact_message_input_output_from_logging(result=result)
|
||||||
|
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
try:
|
try:
|
||||||
litellm_params = self.model_call_details.get("litellm_params", {})
|
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
|
@ -2071,6 +2068,9 @@ class Logging:
|
||||||
callbacks.append(callback)
|
callbacks.append(callback)
|
||||||
else:
|
else:
|
||||||
callbacks = litellm._async_success_callback
|
callbacks = litellm._async_success_callback
|
||||||
|
|
||||||
|
self.redact_message_input_output_from_logging(result=result)
|
||||||
|
|
||||||
print_verbose(f"Async success callbacks: {callbacks}")
|
print_verbose(f"Async success callbacks: {callbacks}")
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
# check if callback can run for this request
|
# check if callback can run for this request
|
||||||
|
@ -2232,7 +2232,10 @@ class Logging:
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = None # result sent to all loggers, init this to None incase it's not created
|
result = None # result sent to all loggers, init this to None incase it's not created
|
||||||
|
|
||||||
|
self.redact_message_input_output_from_logging(result=result)
|
||||||
for callback in litellm.failure_callback:
|
for callback in litellm.failure_callback:
|
||||||
try:
|
try:
|
||||||
if callback == "lite_debugger":
|
if callback == "lite_debugger":
|
||||||
|
@ -2417,6 +2420,33 @@ class Logging:
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def redact_message_input_output_from_logging(self, result):
|
||||||
|
"""
|
||||||
|
Removes messages, prompts, input, response from logging. This modifies the data in-place
|
||||||
|
only redacts when litellm.turn_off_message_logging == True
|
||||||
|
"""
|
||||||
|
# check if user opted out of logging message/response to callbacks
|
||||||
|
if litellm.turn_off_message_logging == True:
|
||||||
|
# remove messages, prompts, input, response from logging
|
||||||
|
self.model_call_details["messages"] = "redacted-by-litellm"
|
||||||
|
self.model_call_details["prompt"] = ""
|
||||||
|
self.model_call_details["input"] = ""
|
||||||
|
|
||||||
|
# response cleaning
|
||||||
|
# ChatCompletion Responses
|
||||||
|
if self.stream and "complete_streaming_response" in self.model_call_details:
|
||||||
|
_streaming_response = self.model_call_details[
|
||||||
|
"complete_streaming_response"
|
||||||
|
]
|
||||||
|
for choice in _streaming_response.choices:
|
||||||
|
choice.message.content = "redacted-by-litellm"
|
||||||
|
else:
|
||||||
|
if result is not None:
|
||||||
|
if isinstance(result, litellm.ModelResponse):
|
||||||
|
if hasattr(result, "choices"):
|
||||||
|
for choice in result.choices:
|
||||||
|
choice.message.content = "redacted-by-litellm"
|
||||||
|
|
||||||
|
|
||||||
def exception_logging(
|
def exception_logging(
|
||||||
additional_args={},
|
additional_args={},
|
||||||
|
@ -2598,9 +2628,15 @@ def function_setup(
|
||||||
dynamic_success_callbacks = kwargs.pop("success_callback")
|
dynamic_success_callbacks = kwargs.pop("success_callback")
|
||||||
|
|
||||||
if add_breadcrumb:
|
if add_breadcrumb:
|
||||||
|
details_to_log = copy.deepcopy(kwargs)
|
||||||
|
if litellm.turn_off_message_logging:
|
||||||
|
# make a copy of the _model_Call_details and log it
|
||||||
|
details_to_log.pop("messages", None)
|
||||||
|
details_to_log.pop("input", None)
|
||||||
|
details_to_log.pop("prompt", None)
|
||||||
add_breadcrumb(
|
add_breadcrumb(
|
||||||
category="litellm.llm_call",
|
category="litellm.llm_call",
|
||||||
message=f"Positional Args: {args}, Keyword Args: {kwargs}",
|
message=f"Positional Args: {args}, Keyword Args: {details_to_log}",
|
||||||
level="info",
|
level="info",
|
||||||
)
|
)
|
||||||
if "logger_fn" in kwargs:
|
if "logger_fn" in kwargs:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.35.30"
|
version = "1.35.31"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.35.30"
|
version = "1.35.31"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-781ca5f151d78d1d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PtTtxXIYvdjQsvRgdITlk\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-ccae12a25017afa5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[27125,[\"447\",\"static/chunks/447-9f8d32190ff7d16d.js\",\"931\",\"static/chunks/app/page-508c39694bd40fe9.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5e699db73bf6f8c2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"kbGdRQFfI6W3bEwfzmJDI\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-781ca5f151d78d1d.js"],""]
|
3:I[27125,["447","static/chunks/447-9f8d32190ff7d16d.js","931","static/chunks/app/page-508c39694bd40fe9.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["PtTtxXIYvdjQsvRgdITlk",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["kbGdRQFfI6W3bEwfzmJDI",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5e699db73bf6f8c2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -39,6 +39,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
const [apiKey, setApiKey] = useState(null);
|
const [apiKey, setApiKey] = useState(null);
|
||||||
const [softBudget, setSoftBudget] = useState(null);
|
const [softBudget, setSoftBudget] = useState(null);
|
||||||
const [userModels, setUserModels] = useState([]);
|
const [userModels, setUserModels] = useState([]);
|
||||||
|
const [modelsToPick, setModelsToPick] = useState([]);
|
||||||
const handleOk = () => {
|
const handleOk = () => {
|
||||||
setIsModalVisible(false);
|
setIsModalVisible(false);
|
||||||
form.resetFields();
|
form.resetFields();
|
||||||
|
@ -95,6 +96,30 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
message.success('API Key copied to clipboard');
|
message.success('API Key copied to clipboard');
|
||||||
};
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let tempModelsToPick = [];
|
||||||
|
|
||||||
|
if (team) {
|
||||||
|
if (team.models.length > 0) {
|
||||||
|
if (team.models.includes("all-proxy-models")) {
|
||||||
|
// if the team has all-proxy-models show all available models
|
||||||
|
tempModelsToPick = userModels;
|
||||||
|
} else {
|
||||||
|
// show team models
|
||||||
|
tempModelsToPick = team.models;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// show all available models if the team has no models set
|
||||||
|
tempModelsToPick = userModels;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no team set, show all available models
|
||||||
|
tempModelsToPick = userModels;
|
||||||
|
}
|
||||||
|
|
||||||
|
setModelsToPick(tempModelsToPick);
|
||||||
|
}, [team, userModels]);
|
||||||
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
|
@ -161,30 +186,15 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
<Option key="all-team-models" value="all-team-models">
|
<Option key="all-team-models" value="all-team-models">
|
||||||
All Team Models
|
All Team Models
|
||||||
</Option>
|
</Option>
|
||||||
{team && team.models ? (
|
{
|
||||||
team.models.includes("all-proxy-models") ? (
|
modelsToPick.map((model: string) => (
|
||||||
userModels.map((model: string) => (
|
|
||||||
(
|
(
|
||||||
<Option key={model} value={model}>
|
<Option key={model} value={model}>
|
||||||
{model}
|
{model}
|
||||||
</Option>
|
</Option>
|
||||||
)
|
)
|
||||||
))
|
))
|
||||||
) : (
|
}
|
||||||
team.models.map((model: string) => (
|
|
||||||
<Option key={model} value={model}>
|
|
||||||
{model}
|
|
||||||
</Option>
|
|
||||||
))
|
|
||||||
)
|
|
||||||
) : (
|
|
||||||
userModels.map((model: string) => (
|
|
||||||
<Option key={model} value={model}>
|
|
||||||
{model}
|
|
||||||
</Option>
|
|
||||||
))
|
|
||||||
)}
|
|
||||||
|
|
||||||
</Select>
|
</Select>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
<Accordion className="mt-20 mb-8" >
|
<Accordion className="mt-20 mb-8" >
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue