forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_azure_content_filter_fallbacks
This commit is contained in:
commit
0454c0781a
51 changed files with 1650 additions and 1074 deletions
|
@ -152,3 +152,104 @@ litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Dynamic TPM Allocation
|
||||||
|
|
||||||
|
Prevent projects from gobbling too much quota.
|
||||||
|
|
||||||
|
Dynamically allocate TPM quota to api keys, based on active keys in that minute.
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: my-fake-model
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
api_key: my-fake-key
|
||||||
|
mock_response: hello-world
|
||||||
|
tpm: 60
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["dynamic_rate_limiter"]
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234 # OR set `LITELLM_MASTER_KEY=".."` in your .env
|
||||||
|
database_url: postgres://.. # OR set `DATABASE_URL=".."` in your .env
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```python
|
||||||
|
"""
|
||||||
|
- Run 2 concurrent teams calling same model
|
||||||
|
- model has 60 TPM
|
||||||
|
- Mock response returns 30 total tokens / request
|
||||||
|
- Each team will only be able to make 1 request per minute
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
- Run 2 concurrent teams calling same model
|
||||||
|
- model has 60 TPM
|
||||||
|
- Mock response returns 30 total tokens / request
|
||||||
|
- Each team will only be able to make 1 request per minute
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
from openai import OpenAI, RateLimitError
|
||||||
|
|
||||||
|
def create_key(api_key: str, base_url: str):
|
||||||
|
response = requests.post(
|
||||||
|
url="{}/key/generate".format(base_url),
|
||||||
|
json={},
|
||||||
|
headers={
|
||||||
|
"Authorization": "Bearer {}".format(api_key)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
_response = response.json()
|
||||||
|
|
||||||
|
return _response["key"]
|
||||||
|
|
||||||
|
key_1 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
key_2 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# call proxy with key 1 - works
|
||||||
|
openai_client_1 = OpenAI(api_key=key_1, base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
response = openai_client_1.chat.completions.with_raw_response.create(
|
||||||
|
model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Headers for call 1 - {}".format(response.headers))
|
||||||
|
_response = response.parse()
|
||||||
|
print("Total tokens for call - {}".format(_response.usage.total_tokens))
|
||||||
|
|
||||||
|
|
||||||
|
# call proxy with key 2 - works
|
||||||
|
openai_client_2 = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
response = openai_client_2.chat.completions.with_raw_response.create(
|
||||||
|
model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Headers for call 2 - {}".format(response.headers))
|
||||||
|
_response = response.parse()
|
||||||
|
print("Total tokens for call - {}".format(_response.usage.total_tokens))
|
||||||
|
# call proxy with key 2 - fails
|
||||||
|
try:
|
||||||
|
openai_client_2.chat.completions.with_raw_response.create(model="my-fake-model", messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||||
|
raise Exception("This should have failed!")
|
||||||
|
except RateLimitError as e:
|
||||||
|
print("This was rate limited b/c - {}".format(str(e)))
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```
|
||||||
|
This was rate limited b/c - Error code: 429 - {'error': {'message': {'error': 'Key=<hashed_token> over available TPM=0. Model TPM=0, Active keys=2'}, 'type': 'None', 'param': 'None', 'code': 429}}
|
||||||
|
```
|
|
@ -37,7 +37,9 @@ input_callback: List[Union[str, Callable]] = []
|
||||||
success_callback: List[Union[str, Callable]] = []
|
success_callback: List[Union[str, Callable]] = []
|
||||||
failure_callback: List[Union[str, Callable]] = []
|
failure_callback: List[Union[str, Callable]] = []
|
||||||
service_callback: List[Union[str, Callable]] = []
|
service_callback: List[Union[str, Callable]] = []
|
||||||
_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter", "logfire"]
|
_custom_logger_compatible_callbacks_literal = Literal[
|
||||||
|
"lago", "openmeter", "logfire", "dynamic_rate_limiter"
|
||||||
|
]
|
||||||
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
||||||
_langfuse_default_tags: Optional[
|
_langfuse_default_tags: Optional[
|
||||||
List[
|
List[
|
||||||
|
@ -735,6 +737,7 @@ from .utils import (
|
||||||
client,
|
client,
|
||||||
exception_type,
|
exception_type,
|
||||||
get_optional_params,
|
get_optional_params,
|
||||||
|
get_response_string,
|
||||||
modify_integration,
|
modify_integration,
|
||||||
token_counter,
|
token_counter,
|
||||||
create_pretrained_tokenizer,
|
create_pretrained_tokenizer,
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from .types.services import ServiceTypes, ServiceLoggerPayload
|
|
||||||
from .integrations.prometheus_services import PrometheusServicesLogger
|
|
||||||
from .integrations.custom_logger import CustomLogger
|
from .integrations.custom_logger import CustomLogger
|
||||||
from datetime import timedelta
|
from .integrations.prometheus_services import PrometheusServicesLogger
|
||||||
from typing import Union, Optional, TYPE_CHECKING, Any
|
from .types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
@ -53,8 +54,8 @@ class ServiceLogging(CustomLogger):
|
||||||
call_type: str,
|
call_type: str,
|
||||||
duration: float,
|
duration: float,
|
||||||
parent_otel_span: Optional[Span] = None,
|
parent_otel_span: Optional[Span] = None,
|
||||||
start_time: Optional[datetime] = None,
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
end_time: Optional[datetime] = None,
|
end_time: Optional[Union[datetime, float]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- For counting if the redis, postgres call is successful
|
- For counting if the redis, postgres call is successful
|
||||||
|
@ -92,8 +93,8 @@ class ServiceLogging(CustomLogger):
|
||||||
error: Union[str, Exception],
|
error: Union[str, Exception],
|
||||||
call_type: str,
|
call_type: str,
|
||||||
parent_otel_span: Optional[Span] = None,
|
parent_otel_span: Optional[Span] = None,
|
||||||
start_time: Optional[datetime] = None,
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
end_time: Optional[datetime] = None,
|
end_time: Optional[Union[float, datetime]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- For counting if the redis, postgres call is unsuccessful
|
- For counting if the redis, postgres call is unsuccessful
|
||||||
|
|
|
@ -7,14 +7,21 @@
|
||||||
#
|
#
|
||||||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||||
|
|
||||||
import litellm
|
import ast
|
||||||
import time, logging, asyncio
|
import asyncio
|
||||||
import json, traceback, ast, hashlib
|
import hashlib
|
||||||
from typing import Optional, Literal, List, Union, Any, BinaryIO
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Any, BinaryIO, List, Literal, Optional, Union
|
||||||
|
|
||||||
from openai._models import BaseModel as OpenAIObject
|
from openai._models import BaseModel as OpenAIObject
|
||||||
|
|
||||||
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
import traceback
|
|
||||||
|
|
||||||
|
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
|
@ -78,6 +85,17 @@ class InMemoryCache(BaseCache):
|
||||||
else:
|
else:
|
||||||
self.set_cache(key=cache_key, value=cache_value)
|
self.set_cache(key=cache_key, value=cache_value)
|
||||||
|
|
||||||
|
async def async_set_cache_sadd(self, key, value: List, ttl: Optional[float]):
|
||||||
|
"""
|
||||||
|
Add value to set
|
||||||
|
"""
|
||||||
|
# get the value
|
||||||
|
init_value = self.get_cache(key=key) or set()
|
||||||
|
for val in value:
|
||||||
|
init_value.add(val)
|
||||||
|
self.set_cache(key, init_value, ttl=ttl)
|
||||||
|
return value
|
||||||
|
|
||||||
def get_cache(self, key, **kwargs):
|
def get_cache(self, key, **kwargs):
|
||||||
if key in self.cache_dict:
|
if key in self.cache_dict:
|
||||||
if key in self.ttl_dict:
|
if key in self.ttl_dict:
|
||||||
|
@ -147,10 +165,12 @@ class RedisCache(BaseCache):
|
||||||
namespace: Optional[str] = None,
|
namespace: Optional[str] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
from ._redis import get_redis_client, get_redis_connection_pool
|
|
||||||
from litellm._service_logger import ServiceLogging
|
|
||||||
import redis
|
import redis
|
||||||
|
|
||||||
|
from litellm._service_logger import ServiceLogging
|
||||||
|
|
||||||
|
from ._redis import get_redis_client, get_redis_connection_pool
|
||||||
|
|
||||||
redis_kwargs = {}
|
redis_kwargs = {}
|
||||||
if host is not None:
|
if host is not None:
|
||||||
redis_kwargs["host"] = host
|
redis_kwargs["host"] = host
|
||||||
|
@ -329,6 +349,7 @@ class RedisCache(BaseCache):
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
||||||
|
call_type="async_set_cache",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
|
@ -448,6 +469,80 @@ class RedisCache(BaseCache):
|
||||||
cache_value,
|
cache_value,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def async_set_cache_sadd(
|
||||||
|
self, key, value: List, ttl: Optional[float], **kwargs
|
||||||
|
):
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
_redis_client = self.init_async_client()
|
||||||
|
except Exception as e:
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_failure_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
error=e,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
||||||
|
call_type="async_set_cache_sadd",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
|
verbose_logger.error(
|
||||||
|
"LiteLLM Redis Caching: async set() - Got exception from REDIS %s, Writing value=%s",
|
||||||
|
str(e),
|
||||||
|
value,
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
key = self.check_and_fix_namespace(key=key)
|
||||||
|
async with _redis_client as redis_client:
|
||||||
|
print_verbose(
|
||||||
|
f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await redis_client.sadd(key, *value)
|
||||||
|
if ttl is not None:
|
||||||
|
_td = timedelta(seconds=ttl)
|
||||||
|
await redis_client.expire(key, _td)
|
||||||
|
print_verbose(
|
||||||
|
f"Successfully Set ASYNC Redis Cache SADD: key: {key}\nValue {value}\nttl={ttl}"
|
||||||
|
)
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_success_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
call_type="async_set_cache_sadd",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
end_time = time.time()
|
||||||
|
_duration = end_time - start_time
|
||||||
|
asyncio.create_task(
|
||||||
|
self.service_logger_obj.async_service_failure_hook(
|
||||||
|
service=ServiceTypes.REDIS,
|
||||||
|
duration=_duration,
|
||||||
|
error=e,
|
||||||
|
call_type="async_set_cache_sadd",
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
|
verbose_logger.error(
|
||||||
|
"LiteLLM Redis Caching: async set_cache_sadd() - Got exception from REDIS %s, Writing value=%s",
|
||||||
|
str(e),
|
||||||
|
value,
|
||||||
|
)
|
||||||
|
|
||||||
async def batch_cache_write(self, key, value, **kwargs):
|
async def batch_cache_write(self, key, value, **kwargs):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}",
|
f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}",
|
||||||
|
@ -886,11 +981,10 @@ class RedisSemanticCache(BaseCache):
|
||||||
|
|
||||||
def get_cache(self, key, **kwargs):
|
def get_cache(self, key, **kwargs):
|
||||||
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
|
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
|
||||||
from redisvl.query import VectorQuery
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from redisvl.query import VectorQuery
|
||||||
|
|
||||||
# query
|
# query
|
||||||
|
|
||||||
# get the messages
|
# get the messages
|
||||||
messages = kwargs["messages"]
|
messages = kwargs["messages"]
|
||||||
prompt = "".join(message["content"] for message in messages)
|
prompt = "".join(message["content"] for message in messages)
|
||||||
|
@ -943,7 +1037,8 @@ class RedisSemanticCache(BaseCache):
|
||||||
|
|
||||||
async def async_set_cache(self, key, value, **kwargs):
|
async def async_set_cache(self, key, value, **kwargs):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from litellm.proxy.proxy_server import llm_router, llm_model_list
|
|
||||||
|
from litellm.proxy.proxy_server import llm_model_list, llm_router
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.index.acreate(overwrite=False) # don't overwrite existing index
|
await self.index.acreate(overwrite=False) # don't overwrite existing index
|
||||||
|
@ -998,12 +1093,12 @@ class RedisSemanticCache(BaseCache):
|
||||||
|
|
||||||
async def async_get_cache(self, key, **kwargs):
|
async def async_get_cache(self, key, **kwargs):
|
||||||
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
|
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
|
||||||
from redisvl.query import VectorQuery
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from litellm.proxy.proxy_server import llm_router, llm_model_list
|
from redisvl.query import VectorQuery
|
||||||
|
|
||||||
|
from litellm.proxy.proxy_server import llm_model_list, llm_router
|
||||||
|
|
||||||
# query
|
# query
|
||||||
|
|
||||||
# get the messages
|
# get the messages
|
||||||
messages = kwargs["messages"]
|
messages = kwargs["messages"]
|
||||||
prompt = "".join(message["content"] for message in messages)
|
prompt = "".join(message["content"] for message in messages)
|
||||||
|
@ -1161,7 +1256,8 @@ class S3Cache(BaseCache):
|
||||||
self.set_cache(key=key, value=value, **kwargs)
|
self.set_cache(key=key, value=value, **kwargs)
|
||||||
|
|
||||||
def get_cache(self, key, **kwargs):
|
def get_cache(self, key, **kwargs):
|
||||||
import boto3, botocore
|
import boto3
|
||||||
|
import botocore
|
||||||
|
|
||||||
try:
|
try:
|
||||||
key = self.key_prefix + key
|
key = self.key_prefix + key
|
||||||
|
@ -1471,7 +1567,7 @@ class DualCache(BaseCache):
|
||||||
key, value, **kwargs
|
key, value, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.redis_cache is not None and local_only == False:
|
if self.redis_cache is not None and local_only is False:
|
||||||
result = await self.redis_cache.async_increment(key, value, **kwargs)
|
result = await self.redis_cache.async_increment(key, value, **kwargs)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
@ -1480,6 +1576,38 @@ class DualCache(BaseCache):
|
||||||
verbose_logger.debug(traceback.format_exc())
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
async def async_set_cache_sadd(
|
||||||
|
self, key, value: List, local_only: bool = False, **kwargs
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Add value to a set
|
||||||
|
|
||||||
|
Key - the key in cache
|
||||||
|
|
||||||
|
Value - str - the value you want to add to the set
|
||||||
|
|
||||||
|
Returns - None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if self.in_memory_cache is not None:
|
||||||
|
_ = await self.in_memory_cache.async_set_cache_sadd(
|
||||||
|
key, value, ttl=kwargs.get("ttl", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.redis_cache is not None and local_only is False:
|
||||||
|
_ = await self.redis_cache.async_set_cache_sadd(
|
||||||
|
key, value, ttl=kwargs.get("ttl", None) ** kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"LiteLLM Cache: Excepton async set_cache_sadd: {}\n{}".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
def flush_cache(self):
|
def flush_cache(self):
|
||||||
if self.in_memory_cache is not None:
|
if self.in_memory_cache is not None:
|
||||||
self.in_memory_cache.flush_cache()
|
self.in_memory_cache.flush_cache()
|
||||||
|
|
|
@ -105,8 +105,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
self,
|
self,
|
||||||
payload: ServiceLoggerPayload,
|
payload: ServiceLoggerPayload,
|
||||||
parent_otel_span: Optional[Span] = None,
|
parent_otel_span: Optional[Span] = None,
|
||||||
start_time: Optional[datetime] = None,
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
end_time: Optional[datetime] = None,
|
end_time: Optional[Union[datetime, float]] = None,
|
||||||
):
|
):
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
@ -144,8 +144,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
self,
|
self,
|
||||||
payload: ServiceLoggerPayload,
|
payload: ServiceLoggerPayload,
|
||||||
parent_otel_span: Optional[Span] = None,
|
parent_otel_span: Optional[Span] = None,
|
||||||
start_time: Optional[datetime] = None,
|
start_time: Optional[Union[datetime, float]] = None,
|
||||||
end_time: Optional[datetime] = None,
|
end_time: Optional[Union[float, datetime]] = None,
|
||||||
):
|
):
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,8 @@ from litellm import (
|
||||||
turn_off_message_logging,
|
turn_off_message_logging,
|
||||||
verbose_logger,
|
verbose_logger,
|
||||||
)
|
)
|
||||||
from litellm.caching import InMemoryCache, S3Cache
|
|
||||||
|
from litellm.caching import InMemoryCache, S3Cache, DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.redact_messages import (
|
from litellm.litellm_core_utils.redact_messages import (
|
||||||
redact_message_input_output_from_logging,
|
redact_message_input_output_from_logging,
|
||||||
|
@ -1899,7 +1900,11 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
|
|
||||||
def _init_custom_logger_compatible_class(
|
def _init_custom_logger_compatible_class(
|
||||||
logging_integration: litellm._custom_logger_compatible_callbacks_literal,
|
logging_integration: litellm._custom_logger_compatible_callbacks_literal,
|
||||||
) -> Callable:
|
internal_usage_cache: Optional[DualCache],
|
||||||
|
llm_router: Optional[
|
||||||
|
Any
|
||||||
|
], # expect litellm.Router, but typing errors due to circular import
|
||||||
|
) -> CustomLogger:
|
||||||
if logging_integration == "lago":
|
if logging_integration == "lago":
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, LagoLogger):
|
if isinstance(callback, LagoLogger):
|
||||||
|
@ -1935,3 +1940,58 @@ def _init_custom_logger_compatible_class(
|
||||||
_otel_logger = OpenTelemetry(config=otel_config)
|
_otel_logger = OpenTelemetry(config=otel_config)
|
||||||
_in_memory_loggers.append(_otel_logger)
|
_in_memory_loggers.append(_otel_logger)
|
||||||
return _otel_logger # type: ignore
|
return _otel_logger # type: ignore
|
||||||
|
elif logging_integration == "dynamic_rate_limiter":
|
||||||
|
from litellm.proxy.hooks.dynamic_rate_limiter import (
|
||||||
|
_PROXY_DynamicRateLimitHandler,
|
||||||
|
)
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, _PROXY_DynamicRateLimitHandler):
|
||||||
|
return callback # type: ignore
|
||||||
|
|
||||||
|
if internal_usage_cache is None:
|
||||||
|
raise Exception(
|
||||||
|
"Internal Error: Cache cannot be empty - internal_usage_cache={}".format(
|
||||||
|
internal_usage_cache
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
dynamic_rate_limiter_obj = _PROXY_DynamicRateLimitHandler(
|
||||||
|
internal_usage_cache=internal_usage_cache
|
||||||
|
)
|
||||||
|
|
||||||
|
if llm_router is not None and isinstance(llm_router, litellm.Router):
|
||||||
|
dynamic_rate_limiter_obj.update_variables(llm_router=llm_router)
|
||||||
|
_in_memory_loggers.append(dynamic_rate_limiter_obj)
|
||||||
|
return dynamic_rate_limiter_obj # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def get_custom_logger_compatible_class(
|
||||||
|
logging_integration: litellm._custom_logger_compatible_callbacks_literal,
|
||||||
|
) -> Optional[CustomLogger]:
|
||||||
|
if logging_integration == "lago":
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, LagoLogger):
|
||||||
|
return callback
|
||||||
|
elif logging_integration == "openmeter":
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, OpenMeterLogger):
|
||||||
|
return callback
|
||||||
|
elif logging_integration == "logfire":
|
||||||
|
if "LOGFIRE_TOKEN" not in os.environ:
|
||||||
|
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
return callback # type: ignore
|
||||||
|
|
||||||
|
elif logging_integration == "dynamic_rate_limiter":
|
||||||
|
from litellm.proxy.hooks.dynamic_rate_limiter import (
|
||||||
|
_PROXY_DynamicRateLimitHandler,
|
||||||
|
)
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, _PROXY_DynamicRateLimitHandler):
|
||||||
|
return callback # type: ignore
|
||||||
|
return None
|
||||||
|
|
|
@ -1,63 +1,64 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Initial implementation of calling bedrock via httpx client (allows for async calls).
|
## Initial implementation of calling bedrock via httpx client (allows for async calls).
|
||||||
## V1 - covers cohere + anthropic claude-3 support
|
## V1 - covers cohere + anthropic claude-3 support
|
||||||
from functools import partial
|
import copy
|
||||||
import os, types
|
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
import os
|
||||||
import requests, copy # type: ignore
|
|
||||||
import time
|
import time
|
||||||
|
import types
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
from enum import Enum
|
||||||
|
from functools import partial
|
||||||
from typing import (
|
from typing import (
|
||||||
|
Any,
|
||||||
|
AsyncIterator,
|
||||||
Callable,
|
Callable,
|
||||||
Optional,
|
Iterator,
|
||||||
List,
|
List,
|
||||||
Literal,
|
Literal,
|
||||||
Union,
|
Optional,
|
||||||
Any,
|
|
||||||
TypedDict,
|
|
||||||
Tuple,
|
Tuple,
|
||||||
Iterator,
|
TypedDict,
|
||||||
AsyncIterator,
|
Union,
|
||||||
)
|
|
||||||
from litellm.utils import (
|
|
||||||
ModelResponse,
|
|
||||||
Usage,
|
|
||||||
CustomStreamWrapper,
|
|
||||||
get_secret,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import httpx # type: ignore
|
||||||
|
import requests # type: ignore
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.caching import DualCache
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
from litellm.types.utils import Message, Choices
|
|
||||||
import litellm, uuid
|
|
||||||
from .prompt_templates.factory import (
|
|
||||||
prompt_factory,
|
|
||||||
custom_prompt,
|
|
||||||
cohere_message_pt,
|
|
||||||
construct_tool_use_system_prompt,
|
|
||||||
extract_between_tags,
|
|
||||||
parse_xml_params,
|
|
||||||
contains_tag,
|
|
||||||
_bedrock_converse_messages_pt,
|
|
||||||
_bedrock_tools_pt,
|
|
||||||
)
|
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
AsyncHTTPHandler,
|
AsyncHTTPHandler,
|
||||||
HTTPHandler,
|
HTTPHandler,
|
||||||
_get_async_httpx_client,
|
_get_async_httpx_client,
|
||||||
_get_httpx_client,
|
_get_httpx_client,
|
||||||
)
|
)
|
||||||
from .base import BaseLLM
|
|
||||||
import httpx # type: ignore
|
|
||||||
from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
|
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
import urllib.parse
|
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionDeltaChunk,
|
||||||
ChatCompletionResponseMessage,
|
ChatCompletionResponseMessage,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
ChatCompletionDeltaChunk,
|
|
||||||
)
|
)
|
||||||
from litellm.caching import DualCache
|
from litellm.types.utils import Choices, Message
|
||||||
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret
|
||||||
|
|
||||||
|
from .base import BaseLLM
|
||||||
|
from .bedrock import BedrockError, ModelResponseIterator, convert_messages_to_prompt
|
||||||
|
from .prompt_templates.factory import (
|
||||||
|
_bedrock_converse_messages_pt,
|
||||||
|
_bedrock_tools_pt,
|
||||||
|
cohere_message_pt,
|
||||||
|
construct_tool_use_system_prompt,
|
||||||
|
contains_tag,
|
||||||
|
custom_prompt,
|
||||||
|
extract_between_tags,
|
||||||
|
parse_xml_params,
|
||||||
|
prompt_factory,
|
||||||
|
)
|
||||||
|
|
||||||
iam_cache = DualCache()
|
iam_cache = DualCache()
|
||||||
|
|
||||||
|
@ -171,26 +172,34 @@ async def make_call(
|
||||||
messages: list,
|
messages: list,
|
||||||
logging_obj,
|
logging_obj,
|
||||||
):
|
):
|
||||||
if client is None:
|
try:
|
||||||
client = _get_async_httpx_client() # Create a new client if none provided
|
if client is None:
|
||||||
|
client = _get_async_httpx_client() # Create a new client if none provided
|
||||||
|
|
||||||
response = await client.post(api_base, headers=headers, data=data, stream=True)
|
response = await client.post(api_base, headers=headers, data=data, stream=True)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise BedrockError(status_code=response.status_code, message=response.text)
|
raise BedrockError(status_code=response.status_code, message=response.text)
|
||||||
|
|
||||||
decoder = AWSEventStreamDecoder(model=model)
|
decoder = AWSEventStreamDecoder(model=model)
|
||||||
completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
|
completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
|
||||||
|
|
||||||
# LOGGING
|
# LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
api_key="",
|
api_key="",
|
||||||
original_response="first stream response received",
|
original_response="first stream response received",
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
)
|
)
|
||||||
|
|
||||||
return completion_stream
|
return completion_stream
|
||||||
|
except httpx.HTTPStatusError as err:
|
||||||
|
error_code = err.response.status_code
|
||||||
|
raise BedrockError(status_code=error_code, message=str(err))
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
raise BedrockError(status_code=408, message="Timeout error occurred.")
|
||||||
|
except Exception as e:
|
||||||
|
raise BedrockError(status_code=500, message=str(e))
|
||||||
|
|
||||||
|
|
||||||
def make_sync_call(
|
def make_sync_call(
|
||||||
|
@ -704,7 +713,6 @@ class BedrockLLM(BaseLLM):
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
try:
|
try:
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
from botocore.auth import SigV4Auth
|
from botocore.auth import SigV4Auth
|
||||||
from botocore.awsrequest import AWSRequest
|
from botocore.awsrequest import AWSRequest
|
||||||
from botocore.credentials import Credentials
|
from botocore.credentials import Credentials
|
||||||
|
@ -1650,7 +1658,6 @@ class BedrockConverseLLM(BaseLLM):
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
from botocore.auth import SigV4Auth
|
from botocore.auth import SigV4Auth
|
||||||
from botocore.awsrequest import AWSRequest
|
from botocore.awsrequest import AWSRequest
|
||||||
from botocore.credentials import Credentials
|
from botocore.credentials import Credentials
|
||||||
|
@ -1904,8 +1911,8 @@ class BedrockConverseLLM(BaseLLM):
|
||||||
|
|
||||||
|
|
||||||
def get_response_stream_shape():
|
def get_response_stream_shape():
|
||||||
from botocore.model import ServiceModel
|
|
||||||
from botocore.loaders import Loader
|
from botocore.loaders import Loader
|
||||||
|
from botocore.model import ServiceModel
|
||||||
|
|
||||||
loader = Loader()
|
loader = Loader()
|
||||||
bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")
|
bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")
|
||||||
|
|
|
@ -1218,6 +1218,7 @@ class ModelResponseIterator:
|
||||||
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
||||||
try:
|
try:
|
||||||
processed_chunk = GenerateContentResponseBody(**chunk) # type: ignore
|
processed_chunk = GenerateContentResponseBody(**chunk) # type: ignore
|
||||||
|
|
||||||
text = ""
|
text = ""
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
is_finished = False
|
is_finished = False
|
||||||
|
@ -1236,7 +1237,8 @@ class ModelResponseIterator:
|
||||||
finish_reason = map_finish_reason(
|
finish_reason = map_finish_reason(
|
||||||
finish_reason=gemini_chunk["finishReason"]
|
finish_reason=gemini_chunk["finishReason"]
|
||||||
)
|
)
|
||||||
is_finished = True
|
## DO NOT SET 'finish_reason' = True
|
||||||
|
## GEMINI SETS FINISHREASON ON EVERY CHUNK!
|
||||||
|
|
||||||
if "usageMetadata" in processed_chunk:
|
if "usageMetadata" in processed_chunk:
|
||||||
usage = ChatCompletionUsageBlock(
|
usage = ChatCompletionUsageBlock(
|
||||||
|
@ -1250,7 +1252,7 @@ class ModelResponseIterator:
|
||||||
returned_chunk = GenericStreamingChunk(
|
returned_chunk = GenericStreamingChunk(
|
||||||
text=text,
|
text=text,
|
||||||
tool_use=tool_use,
|
tool_use=tool_use,
|
||||||
is_finished=is_finished,
|
is_finished=False,
|
||||||
finish_reason=finish_reason,
|
finish_reason=finish_reason,
|
||||||
usage=usage,
|
usage=usage,
|
||||||
index=0,
|
index=0,
|
||||||
|
@ -1268,9 +1270,8 @@ class ModelResponseIterator:
|
||||||
chunk = self.response_iterator.__next__()
|
chunk = self.response_iterator.__next__()
|
||||||
self.coro.send(chunk)
|
self.coro.send(chunk)
|
||||||
if self.events:
|
if self.events:
|
||||||
event = self.events[0]
|
event = self.events.pop(0)
|
||||||
json_chunk = event
|
json_chunk = event
|
||||||
self.events.clear()
|
|
||||||
return self.chunk_parser(chunk=json_chunk)
|
return self.chunk_parser(chunk=json_chunk)
|
||||||
return GenericStreamingChunk(
|
return GenericStreamingChunk(
|
||||||
text="",
|
text="",
|
||||||
|
@ -1281,6 +1282,9 @@ class ModelResponseIterator:
|
||||||
tool_use=None,
|
tool_use=None,
|
||||||
)
|
)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
|
if self.events: # flush the events
|
||||||
|
event = self.events.pop(0) # Remove the first event
|
||||||
|
return self.chunk_parser(chunk=event)
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise RuntimeError(f"Error parsing chunk: {e}")
|
raise RuntimeError(f"Error parsing chunk: {e}")
|
||||||
|
@ -1295,9 +1299,8 @@ class ModelResponseIterator:
|
||||||
chunk = await self.async_response_iterator.__anext__()
|
chunk = await self.async_response_iterator.__anext__()
|
||||||
self.coro.send(chunk)
|
self.coro.send(chunk)
|
||||||
if self.events:
|
if self.events:
|
||||||
event = self.events[0]
|
event = self.events.pop(0)
|
||||||
json_chunk = event
|
json_chunk = event
|
||||||
self.events.clear()
|
|
||||||
return self.chunk_parser(chunk=json_chunk)
|
return self.chunk_parser(chunk=json_chunk)
|
||||||
return GenericStreamingChunk(
|
return GenericStreamingChunk(
|
||||||
text="",
|
text="",
|
||||||
|
@ -1308,6 +1311,9 @@ class ModelResponseIterator:
|
||||||
tool_use=None,
|
tool_use=None,
|
||||||
)
|
)
|
||||||
except StopAsyncIteration:
|
except StopAsyncIteration:
|
||||||
|
if self.events: # flush the events
|
||||||
|
event = self.events.pop(0) # Remove the first event
|
||||||
|
return self.chunk_parser(chunk=event)
|
||||||
raise StopAsyncIteration
|
raise StopAsyncIteration
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise RuntimeError(f"Error parsing chunk: {e}")
|
raise RuntimeError(f"Error parsing chunk: {e}")
|
||||||
|
|
|
@ -428,7 +428,7 @@ def mock_completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: List,
|
messages: List,
|
||||||
stream: Optional[bool] = False,
|
stream: Optional[bool] = False,
|
||||||
mock_response: Union[str, Exception] = "This is a mock request",
|
mock_response: Union[str, Exception, dict] = "This is a mock request",
|
||||||
mock_tool_calls: Optional[List] = None,
|
mock_tool_calls: Optional[List] = None,
|
||||||
logging=None,
|
logging=None,
|
||||||
custom_llm_provider=None,
|
custom_llm_provider=None,
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-5b9334558218205d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dDWf4yi4zCe685SxgCnWX\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-42b04008af7da690.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-5b9334558218205d.js"],""]
|
3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-42b04008af7da690.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -2,6 +2,6 @@
|
||||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-da04a591bae84617.js"],""]
|
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-fd30ae439831db99.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,14 +1,10 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: my-fake-model
|
- model_name: my-fake-model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
api_key: my-fake-key
|
api_key: my-fake-key
|
||||||
mock_response: hello-world
|
aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
|
||||||
- model_name: gpt-4o
|
|
||||||
litellm_params:
|
|
||||||
model: azure/gpt-4o
|
|
||||||
api_base: https://litellm8397336933.openai.azure.com/
|
|
||||||
api_key: 610f806211ab47f2a694493000045858
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}]
|
success_callback: ["langfuse"]
|
||||||
|
failure_callback: ["langfuse"]
|
||||||
|
|
|
@ -30,6 +30,7 @@ model_list:
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
api_version: 2024-02-15-preview
|
api_version: 2024-02-15-preview
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
|
tpm: 100
|
||||||
model_name: gpt-3.5-turbo
|
model_name: gpt-3.5-turbo
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
model: anthropic.claude-3-sonnet-20240229-v1:0
|
model: anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
|
@ -40,6 +41,7 @@ model_list:
|
||||||
api_version: 2024-02-15-preview
|
api_version: 2024-02-15-preview
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
drop_params: True
|
drop_params: True
|
||||||
|
tpm: 100
|
||||||
model_name: gpt-3.5-turbo
|
model_name: gpt-3.5-turbo
|
||||||
- model_name: tts
|
- model_name: tts
|
||||||
litellm_params:
|
litellm_params:
|
||||||
|
@ -67,8 +69,7 @@ model_list:
|
||||||
max_input_tokens: 80920
|
max_input_tokens: 80920
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["langfuse"]
|
callbacks: ["dynamic_rate_limiter"]
|
||||||
failure_callback: ["langfuse"]
|
|
||||||
# default_team_settings:
|
# default_team_settings:
|
||||||
# - team_id: proj1
|
# - team_id: proj1
|
||||||
# success_callback: ["langfuse"]
|
# success_callback: ["langfuse"]
|
||||||
|
|
|
@ -188,6 +188,9 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
# audio transcription
|
# audio transcription
|
||||||
"/audio/transcriptions",
|
"/audio/transcriptions",
|
||||||
"/v1/audio/transcriptions",
|
"/v1/audio/transcriptions",
|
||||||
|
# audio Speech
|
||||||
|
"/audio/speech",
|
||||||
|
"/v1/audio/speech",
|
||||||
# moderations
|
# moderations
|
||||||
"/moderations",
|
"/moderations",
|
||||||
"/v1/moderations",
|
"/v1/moderations",
|
||||||
|
|
205
litellm/proxy/hooks/dynamic_rate_limiter.py
Normal file
205
litellm/proxy/hooks/dynamic_rate_limiter.py
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
# What is this?
|
||||||
|
## Allocates dynamic tpm/rpm quota for a project based on current traffic
|
||||||
|
## Tracks num active projects per minute
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import ModelResponse, Router
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
from litellm.types.router import ModelGroupInfo
|
||||||
|
from litellm.utils import get_utc_datetime
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicRateLimiterCache:
|
||||||
|
"""
|
||||||
|
Thin wrapper on DualCache for this file.
|
||||||
|
|
||||||
|
Track number of active projects calling a model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache: DualCache) -> None:
|
||||||
|
self.cache = cache
|
||||||
|
self.ttl = 60 # 1 min ttl
|
||||||
|
|
||||||
|
async def async_get_cache(self, model: str) -> Optional[int]:
|
||||||
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
|
key_name = "{}:{}".format(current_minute, model)
|
||||||
|
_response = await self.cache.async_get_cache(key=key_name)
|
||||||
|
response: Optional[int] = None
|
||||||
|
if _response is not None:
|
||||||
|
response = len(_response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
async def async_set_cache_sadd(self, model: str, value: List):
|
||||||
|
"""
|
||||||
|
Add value to set.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- model: str, the name of the model group
|
||||||
|
- value: str, the team id
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- None
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
- Exception, if unable to connect to cache client (if redis caching enabled)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
|
|
||||||
|
key_name = "{}:{}".format(current_minute, model)
|
||||||
|
await self.cache.async_set_cache_sadd(
|
||||||
|
key=key_name, value=value, ttl=self.ttl
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.hooks.dynamic_rate_limiter.py::async_set_cache_sadd(): Exception occured - {}\n{}".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
class _PROXY_DynamicRateLimitHandler(CustomLogger):
|
||||||
|
|
||||||
|
# Class variables or attributes
|
||||||
|
def __init__(self, internal_usage_cache: DualCache):
|
||||||
|
self.internal_usage_cache = DynamicRateLimiterCache(cache=internal_usage_cache)
|
||||||
|
|
||||||
|
def update_variables(self, llm_router: Router):
|
||||||
|
self.llm_router = llm_router
|
||||||
|
|
||||||
|
async def check_available_tpm(
|
||||||
|
self, model: str
|
||||||
|
) -> Tuple[Optional[int], Optional[int], Optional[int]]:
|
||||||
|
"""
|
||||||
|
For a given model, get its available tpm
|
||||||
|
|
||||||
|
Returns
|
||||||
|
- Tuple[available_tpm, model_tpm, active_projects]
|
||||||
|
- available_tpm: int or null - always 0 or positive.
|
||||||
|
- remaining_model_tpm: int or null. If available tpm is int, then this will be too.
|
||||||
|
- active_projects: int or null
|
||||||
|
"""
|
||||||
|
active_projects = await self.internal_usage_cache.async_get_cache(model=model)
|
||||||
|
current_model_tpm: Optional[int] = await self.llm_router.get_model_group_usage(
|
||||||
|
model_group=model
|
||||||
|
)
|
||||||
|
model_group_info: Optional[ModelGroupInfo] = (
|
||||||
|
self.llm_router.get_model_group_info(model_group=model)
|
||||||
|
)
|
||||||
|
total_model_tpm: Optional[int] = None
|
||||||
|
if model_group_info is not None and model_group_info.tpm is not None:
|
||||||
|
total_model_tpm = model_group_info.tpm
|
||||||
|
|
||||||
|
remaining_model_tpm: Optional[int] = None
|
||||||
|
if total_model_tpm is not None and current_model_tpm is not None:
|
||||||
|
remaining_model_tpm = total_model_tpm - current_model_tpm
|
||||||
|
elif total_model_tpm is not None:
|
||||||
|
remaining_model_tpm = total_model_tpm
|
||||||
|
|
||||||
|
available_tpm: Optional[int] = None
|
||||||
|
|
||||||
|
if remaining_model_tpm is not None:
|
||||||
|
if active_projects is not None:
|
||||||
|
available_tpm = int(remaining_model_tpm / active_projects)
|
||||||
|
else:
|
||||||
|
available_tpm = remaining_model_tpm
|
||||||
|
|
||||||
|
if available_tpm is not None and available_tpm < 0:
|
||||||
|
available_tpm = 0
|
||||||
|
return available_tpm, remaining_model_tpm, active_projects
|
||||||
|
|
||||||
|
async def async_pre_call_hook(
|
||||||
|
self,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
cache: DualCache,
|
||||||
|
data: dict,
|
||||||
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"text_completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
],
|
||||||
|
) -> Optional[
|
||||||
|
Union[Exception, str, dict]
|
||||||
|
]: # raise exception if invalid, return a str for the user to receive - if rejected, or return a modified dictionary for passing into litellm
|
||||||
|
"""
|
||||||
|
- For a model group
|
||||||
|
- Check if tpm available
|
||||||
|
- Raise RateLimitError if no tpm available
|
||||||
|
"""
|
||||||
|
if "model" in data:
|
||||||
|
available_tpm, model_tpm, active_projects = await self.check_available_tpm(
|
||||||
|
model=data["model"]
|
||||||
|
)
|
||||||
|
if available_tpm is not None and available_tpm == 0:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=429,
|
||||||
|
detail={
|
||||||
|
"error": "Key={} over available TPM={}. Model TPM={}, Active keys={}".format(
|
||||||
|
user_api_key_dict.api_key,
|
||||||
|
available_tpm,
|
||||||
|
model_tpm,
|
||||||
|
active_projects,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
elif available_tpm is not None:
|
||||||
|
## UPDATE CACHE WITH ACTIVE PROJECT
|
||||||
|
asyncio.create_task(
|
||||||
|
self.internal_usage_cache.async_set_cache_sadd( # this is a set
|
||||||
|
model=data["model"], # type: ignore
|
||||||
|
value=[user_api_key_dict.token or "default_key"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def async_post_call_success_hook(
|
||||||
|
self, user_api_key_dict: UserAPIKeyAuth, response
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
if isinstance(response, ModelResponse):
|
||||||
|
model_info = self.llm_router.get_model_info(
|
||||||
|
id=response._hidden_params["model_id"]
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
model_info is not None
|
||||||
|
), "Model info for model with id={} is None".format(
|
||||||
|
response._hidden_params["model_id"]
|
||||||
|
)
|
||||||
|
available_tpm, remaining_model_tpm, active_projects = (
|
||||||
|
await self.check_available_tpm(model=model_info["model_name"])
|
||||||
|
)
|
||||||
|
response._hidden_params["additional_headers"] = {
|
||||||
|
"x-litellm-model_group": model_info["model_name"],
|
||||||
|
"x-ratelimit-remaining-litellm-project-tokens": available_tpm,
|
||||||
|
"x-ratelimit-remaining-model-tokens": remaining_model_tpm,
|
||||||
|
"x-ratelimit-current-active-projects": active_projects,
|
||||||
|
}
|
||||||
|
|
||||||
|
return response
|
||||||
|
return await super().async_post_call_success_hook(
|
||||||
|
user_api_key_dict, response
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.hooks.dynamic_rate_limiter.py::async_post_call_success_hook(): Exception occured - {}\n{}".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return response
|
|
@ -433,6 +433,7 @@ def get_custom_headers(
|
||||||
version: Optional[str] = None,
|
version: Optional[str] = None,
|
||||||
model_region: Optional[str] = None,
|
model_region: Optional[str] = None,
|
||||||
fastest_response_batch_completion: Optional[bool] = None,
|
fastest_response_batch_completion: Optional[bool] = None,
|
||||||
|
**kwargs,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
exclude_values = {"", None}
|
exclude_values = {"", None}
|
||||||
headers = {
|
headers = {
|
||||||
|
@ -448,6 +449,7 @@ def get_custom_headers(
|
||||||
if fastest_response_batch_completion is not None
|
if fastest_response_batch_completion is not None
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
**{k: str(v) for k, v in kwargs.items()},
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
return {
|
return {
|
||||||
|
@ -2524,11 +2526,10 @@ async def async_data_generator(
|
||||||
yield f"data: {done_message}\n\n"
|
yield f"data: {done_message}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
verbose_proxy_logger.error(
|
||||||
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
|
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}\n{}".format(
|
||||||
str(e)
|
str(e), traceback.format_exc()
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(traceback.format_exc())
|
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
original_exception=e,
|
original_exception=e,
|
||||||
|
@ -2644,7 +2645,9 @@ async def startup_event():
|
||||||
redis_cache=redis_usage_cache
|
redis_cache=redis_usage_cache
|
||||||
) # used by parallel request limiter for rate limiting keys across instances
|
) # used by parallel request limiter for rate limiting keys across instances
|
||||||
|
|
||||||
proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
|
proxy_logging_obj._init_litellm_callbacks(
|
||||||
|
llm_router=llm_router
|
||||||
|
) # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
|
||||||
|
|
||||||
if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types:
|
if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types:
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
|
@ -3061,6 +3064,14 @@ async def chat_completion(
|
||||||
headers=custom_headers,
|
headers=custom_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
### CALL HOOKS ### - modify outgoing data
|
||||||
|
response = await proxy_logging_obj.post_call_success_hook(
|
||||||
|
user_api_key_dict=user_api_key_dict, response=response
|
||||||
|
)
|
||||||
|
|
||||||
|
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||||
|
additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
@ -3070,14 +3081,10 @@ async def chat_completion(
|
||||||
version=version,
|
version=version,
|
||||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||||
fastest_response_batch_completion=fastest_response_batch_completion,
|
fastest_response_batch_completion=fastest_response_batch_completion,
|
||||||
|
**additional_headers,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
### CALL HOOKS ### - modify outgoing data
|
|
||||||
response = await proxy_logging_obj.post_call_success_hook(
|
|
||||||
user_api_key_dict=user_api_key_dict, response=response
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except RejectedRequestError as e:
|
except RejectedRequestError as e:
|
||||||
_data = e.request_data
|
_data = e.request_data
|
||||||
|
@ -3116,11 +3123,10 @@ async def chat_completion(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
verbose_proxy_logger.error(
|
verbose_proxy_logger.error(
|
||||||
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
|
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}\n{}".format(
|
||||||
get_error_message_str(e=e)
|
get_error_message_str(e=e), traceback.format_exc()
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(traceback.format_exc())
|
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
|
@ -7502,6 +7508,12 @@ async def login(request: Request):
|
||||||
litellm_dashboard_ui += "/ui/"
|
litellm_dashboard_ui += "/ui/"
|
||||||
import jwt
|
import jwt
|
||||||
|
|
||||||
|
if litellm_master_key_hash is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
|
||||||
|
)
|
||||||
|
|
||||||
jwt_token = jwt.encode(
|
jwt_token = jwt.encode(
|
||||||
{
|
{
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
@ -7511,11 +7523,13 @@ async def login(request: Request):
|
||||||
"login_method": "username_password",
|
"login_method": "username_password",
|
||||||
"premium_user": premium_user,
|
"premium_user": premium_user,
|
||||||
},
|
},
|
||||||
"secret",
|
litellm_master_key_hash,
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token
|
litellm_dashboard_ui += "?userID=" + user_id
|
||||||
return RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
||||||
|
redirect_response.set_cookie(key="token", value=jwt_token)
|
||||||
|
return redirect_response
|
||||||
elif _user_row is not None:
|
elif _user_row is not None:
|
||||||
"""
|
"""
|
||||||
When sharing invite links
|
When sharing invite links
|
||||||
|
@ -7564,6 +7578,14 @@ async def login(request: Request):
|
||||||
litellm_dashboard_ui += "/ui/"
|
litellm_dashboard_ui += "/ui/"
|
||||||
import jwt
|
import jwt
|
||||||
|
|
||||||
|
if litellm_master_key_hash is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={
|
||||||
|
"error": "No master key set, please set LITELLM_MASTER_KEY"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
jwt_token = jwt.encode(
|
jwt_token = jwt.encode(
|
||||||
{
|
{
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
@ -7573,11 +7595,15 @@ async def login(request: Request):
|
||||||
"login_method": "username_password",
|
"login_method": "username_password",
|
||||||
"premium_user": premium_user,
|
"premium_user": premium_user,
|
||||||
},
|
},
|
||||||
"secret",
|
litellm_master_key_hash,
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token
|
litellm_dashboard_ui += "?userID=" + user_id
|
||||||
return RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
redirect_response = RedirectResponse(
|
||||||
|
url=litellm_dashboard_ui, status_code=303
|
||||||
|
)
|
||||||
|
redirect_response.set_cookie(key="token", value=jwt_token)
|
||||||
|
return redirect_response
|
||||||
else:
|
else:
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nNot valid credentials for {username}",
|
message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nNot valid credentials for {username}",
|
||||||
|
@ -7688,6 +7714,12 @@ async def onboarding(invite_link: str):
|
||||||
litellm_dashboard_ui += "/ui/onboarding"
|
litellm_dashboard_ui += "/ui/onboarding"
|
||||||
import jwt
|
import jwt
|
||||||
|
|
||||||
|
if litellm_master_key_hash is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
|
||||||
|
)
|
||||||
|
|
||||||
jwt_token = jwt.encode(
|
jwt_token = jwt.encode(
|
||||||
{
|
{
|
||||||
"user_id": user_obj.user_id,
|
"user_id": user_obj.user_id,
|
||||||
|
@ -7697,7 +7729,7 @@ async def onboarding(invite_link: str):
|
||||||
"login_method": "username_password",
|
"login_method": "username_password",
|
||||||
"premium_user": premium_user,
|
"premium_user": premium_user,
|
||||||
},
|
},
|
||||||
"secret",
|
litellm_master_key_hash,
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -8108,6 +8140,12 @@ async def auth_callback(request: Request):
|
||||||
|
|
||||||
import jwt
|
import jwt
|
||||||
|
|
||||||
|
if litellm_master_key_hash is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
|
||||||
|
)
|
||||||
|
|
||||||
jwt_token = jwt.encode(
|
jwt_token = jwt.encode(
|
||||||
{
|
{
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
@ -8117,11 +8155,13 @@ async def auth_callback(request: Request):
|
||||||
"login_method": "sso",
|
"login_method": "sso",
|
||||||
"premium_user": premium_user,
|
"premium_user": premium_user,
|
||||||
},
|
},
|
||||||
"secret",
|
litellm_master_key_hash,
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token
|
litellm_dashboard_ui += "?userID=" + user_id
|
||||||
return RedirectResponse(url=litellm_dashboard_ui)
|
redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
||||||
|
redirect_response.set_cookie(key="token", value=jwt_token)
|
||||||
|
return redirect_response
|
||||||
|
|
||||||
|
|
||||||
#### INVITATION MANAGEMENT ####
|
#### INVITATION MANAGEMENT ####
|
||||||
|
|
|
@ -229,31 +229,32 @@ class ProxyLogging:
|
||||||
if redis_cache is not None:
|
if redis_cache is not None:
|
||||||
self.internal_usage_cache.redis_cache = redis_cache
|
self.internal_usage_cache.redis_cache = redis_cache
|
||||||
|
|
||||||
def _init_litellm_callbacks(self):
|
def _init_litellm_callbacks(self, llm_router: Optional[litellm.Router] = None):
|
||||||
print_verbose("INITIALIZING LITELLM CALLBACKS!")
|
|
||||||
self.service_logging_obj = ServiceLogging()
|
self.service_logging_obj = ServiceLogging()
|
||||||
litellm.callbacks.append(self.max_parallel_request_limiter)
|
litellm.callbacks.append(self.max_parallel_request_limiter) # type: ignore
|
||||||
litellm.callbacks.append(self.max_budget_limiter)
|
litellm.callbacks.append(self.max_budget_limiter) # type: ignore
|
||||||
litellm.callbacks.append(self.cache_control_check)
|
litellm.callbacks.append(self.cache_control_check) # type: ignore
|
||||||
litellm.callbacks.append(self.service_logging_obj)
|
litellm.callbacks.append(self.service_logging_obj) # type: ignore
|
||||||
litellm.success_callback.append(
|
litellm.success_callback.append(
|
||||||
self.slack_alerting_instance.response_taking_too_long_callback
|
self.slack_alerting_instance.response_taking_too_long_callback
|
||||||
)
|
)
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
if isinstance(callback, str):
|
if isinstance(callback, str):
|
||||||
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(
|
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||||
callback
|
callback,
|
||||||
|
internal_usage_cache=self.internal_usage_cache,
|
||||||
|
llm_router=llm_router,
|
||||||
)
|
)
|
||||||
if callback not in litellm.input_callback:
|
if callback not in litellm.input_callback:
|
||||||
litellm.input_callback.append(callback)
|
litellm.input_callback.append(callback) # type: ignore
|
||||||
if callback not in litellm.success_callback:
|
if callback not in litellm.success_callback:
|
||||||
litellm.success_callback.append(callback)
|
litellm.success_callback.append(callback) # type: ignore
|
||||||
if callback not in litellm.failure_callback:
|
if callback not in litellm.failure_callback:
|
||||||
litellm.failure_callback.append(callback)
|
litellm.failure_callback.append(callback) # type: ignore
|
||||||
if callback not in litellm._async_success_callback:
|
if callback not in litellm._async_success_callback:
|
||||||
litellm._async_success_callback.append(callback)
|
litellm._async_success_callback.append(callback) # type: ignore
|
||||||
if callback not in litellm._async_failure_callback:
|
if callback not in litellm._async_failure_callback:
|
||||||
litellm._async_failure_callback.append(callback)
|
litellm._async_failure_callback.append(callback) # type: ignore
|
||||||
|
|
||||||
if (
|
if (
|
||||||
len(litellm.input_callback) > 0
|
len(litellm.input_callback) > 0
|
||||||
|
@ -301,10 +302,19 @@ class ProxyLogging:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
if isinstance(callback, CustomLogger) and "async_pre_call_hook" in vars(
|
_callback: Optional[CustomLogger] = None
|
||||||
callback.__class__
|
if isinstance(callback, str):
|
||||||
|
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
|
||||||
|
callback
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_callback = callback # type: ignore
|
||||||
|
if (
|
||||||
|
_callback is not None
|
||||||
|
and isinstance(_callback, CustomLogger)
|
||||||
|
and "async_pre_call_hook" in vars(_callback.__class__)
|
||||||
):
|
):
|
||||||
response = await callback.async_pre_call_hook(
|
response = await _callback.async_pre_call_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
cache=self.call_details["user_api_key_cache"],
|
cache=self.call_details["user_api_key_cache"],
|
||||||
data=data,
|
data=data,
|
||||||
|
@ -574,8 +584,15 @@ class ProxyLogging:
|
||||||
|
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
try:
|
try:
|
||||||
if isinstance(callback, CustomLogger):
|
_callback: Optional[CustomLogger] = None
|
||||||
await callback.async_post_call_failure_hook(
|
if isinstance(callback, str):
|
||||||
|
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
|
||||||
|
callback
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_callback = callback # type: ignore
|
||||||
|
if _callback is not None and isinstance(_callback, CustomLogger):
|
||||||
|
await _callback.async_post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
original_exception=original_exception,
|
original_exception=original_exception,
|
||||||
)
|
)
|
||||||
|
@ -596,8 +613,15 @@ class ProxyLogging:
|
||||||
"""
|
"""
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
try:
|
try:
|
||||||
if isinstance(callback, CustomLogger):
|
_callback: Optional[CustomLogger] = None
|
||||||
await callback.async_post_call_success_hook(
|
if isinstance(callback, str):
|
||||||
|
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
|
||||||
|
callback
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_callback = callback # type: ignore
|
||||||
|
if _callback is not None and isinstance(_callback, CustomLogger):
|
||||||
|
await _callback.async_post_call_success_hook(
|
||||||
user_api_key_dict=user_api_key_dict, response=response
|
user_api_key_dict=user_api_key_dict, response=response
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -615,14 +639,25 @@ class ProxyLogging:
|
||||||
Covers:
|
Covers:
|
||||||
1. /chat/completions
|
1. /chat/completions
|
||||||
"""
|
"""
|
||||||
for callback in litellm.callbacks:
|
response_str: Optional[str] = None
|
||||||
try:
|
if isinstance(response, ModelResponse):
|
||||||
if isinstance(callback, CustomLogger):
|
response_str = litellm.get_response_string(response_obj=response)
|
||||||
await callback.async_post_call_streaming_hook(
|
if response_str is not None:
|
||||||
user_api_key_dict=user_api_key_dict, response=response
|
for callback in litellm.callbacks:
|
||||||
)
|
try:
|
||||||
except Exception as e:
|
_callback: Optional[CustomLogger] = None
|
||||||
raise e
|
if isinstance(callback, str):
|
||||||
|
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
|
||||||
|
callback
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_callback = callback # type: ignore
|
||||||
|
if _callback is not None and isinstance(_callback, CustomLogger):
|
||||||
|
await _callback.async_post_call_streaming_hook(
|
||||||
|
user_api_key_dict=user_api_key_dict, response=response_str
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
return response
|
return response
|
||||||
|
|
||||||
async def post_call_streaming_hook(
|
async def post_call_streaming_hook(
|
||||||
|
|
|
@ -11,6 +11,7 @@ import asyncio
|
||||||
import concurrent
|
import concurrent
|
||||||
import copy
|
import copy
|
||||||
import datetime as datetime_og
|
import datetime as datetime_og
|
||||||
|
import enum
|
||||||
import hashlib
|
import hashlib
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
|
@ -90,6 +91,10 @@ from litellm.utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RoutingArgs(enum.Enum):
|
||||||
|
ttl = 60 # 1min (RPM/TPM expire key)
|
||||||
|
|
||||||
|
|
||||||
class Router:
|
class Router:
|
||||||
model_names: List = []
|
model_names: List = []
|
||||||
cache_responses: Optional[bool] = False
|
cache_responses: Optional[bool] = False
|
||||||
|
@ -387,6 +392,11 @@ class Router:
|
||||||
routing_strategy=routing_strategy,
|
routing_strategy=routing_strategy,
|
||||||
routing_strategy_args=routing_strategy_args,
|
routing_strategy_args=routing_strategy_args,
|
||||||
)
|
)
|
||||||
|
## USAGE TRACKING ##
|
||||||
|
if isinstance(litellm._async_success_callback, list):
|
||||||
|
litellm._async_success_callback.append(self.deployment_callback_on_success)
|
||||||
|
else:
|
||||||
|
litellm._async_success_callback.append(self.deployment_callback_on_success)
|
||||||
## COOLDOWNS ##
|
## COOLDOWNS ##
|
||||||
if isinstance(litellm.failure_callback, list):
|
if isinstance(litellm.failure_callback, list):
|
||||||
litellm.failure_callback.append(self.deployment_callback_on_failure)
|
litellm.failure_callback.append(self.deployment_callback_on_failure)
|
||||||
|
@ -2664,13 +2674,69 @@ class Router:
|
||||||
time.sleep(_timeout)
|
time.sleep(_timeout)
|
||||||
|
|
||||||
if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES:
|
if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES:
|
||||||
original_exception.max_retries = num_retries
|
setattr(original_exception, "max_retries", num_retries)
|
||||||
original_exception.num_retries = current_attempt
|
setattr(original_exception, "num_retries", current_attempt)
|
||||||
|
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
### HELPER FUNCTIONS
|
### HELPER FUNCTIONS
|
||||||
|
|
||||||
|
async def deployment_callback_on_success(
|
||||||
|
self,
|
||||||
|
kwargs, # kwargs to completion
|
||||||
|
completion_response, # response from completion
|
||||||
|
start_time,
|
||||||
|
end_time, # start/end time
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Track remaining tpm/rpm quota for model in model_list
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
"""
|
||||||
|
Update TPM usage on success
|
||||||
|
"""
|
||||||
|
if kwargs["litellm_params"].get("metadata") is None:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
model_group = kwargs["litellm_params"]["metadata"].get(
|
||||||
|
"model_group", None
|
||||||
|
)
|
||||||
|
|
||||||
|
id = kwargs["litellm_params"].get("model_info", {}).get("id", None)
|
||||||
|
if model_group is None or id is None:
|
||||||
|
return
|
||||||
|
elif isinstance(id, int):
|
||||||
|
id = str(id)
|
||||||
|
|
||||||
|
total_tokens = completion_response["usage"]["total_tokens"]
|
||||||
|
|
||||||
|
# ------------
|
||||||
|
# Setup values
|
||||||
|
# ------------
|
||||||
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime(
|
||||||
|
"%H-%M"
|
||||||
|
) # use the same timezone regardless of system clock
|
||||||
|
|
||||||
|
tpm_key = f"global_router:{id}:tpm:{current_minute}"
|
||||||
|
# ------------
|
||||||
|
# Update usage
|
||||||
|
# ------------
|
||||||
|
# update cache
|
||||||
|
|
||||||
|
## TPM
|
||||||
|
await self.cache.async_increment_cache(
|
||||||
|
key=tpm_key, value=total_tokens, ttl=RoutingArgs.ttl.value
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}\n{}".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
def deployment_callback_on_failure(
|
def deployment_callback_on_failure(
|
||||||
self,
|
self,
|
||||||
kwargs, # kwargs to completion
|
kwargs, # kwargs to completion
|
||||||
|
@ -3870,10 +3936,39 @@ class Router:
|
||||||
|
|
||||||
model_group_info: Optional[ModelGroupInfo] = None
|
model_group_info: Optional[ModelGroupInfo] = None
|
||||||
|
|
||||||
|
total_tpm: Optional[int] = None
|
||||||
|
total_rpm: Optional[int] = None
|
||||||
|
|
||||||
for model in self.model_list:
|
for model in self.model_list:
|
||||||
if "model_name" in model and model["model_name"] == model_group:
|
if "model_name" in model and model["model_name"] == model_group:
|
||||||
# model in model group found #
|
# model in model group found #
|
||||||
litellm_params = LiteLLM_Params(**model["litellm_params"])
|
litellm_params = LiteLLM_Params(**model["litellm_params"])
|
||||||
|
# get model tpm
|
||||||
|
_deployment_tpm: Optional[int] = None
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = model.get("tpm", None)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = model.get("litellm_params", {}).get("tpm", None)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = model.get("model_info", {}).get("tpm", None)
|
||||||
|
|
||||||
|
if _deployment_tpm is not None:
|
||||||
|
if total_tpm is None:
|
||||||
|
total_tpm = 0
|
||||||
|
total_tpm += _deployment_tpm # type: ignore
|
||||||
|
# get model rpm
|
||||||
|
_deployment_rpm: Optional[int] = None
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = model.get("rpm", None)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = model.get("litellm_params", {}).get("rpm", None)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = model.get("model_info", {}).get("rpm", None)
|
||||||
|
|
||||||
|
if _deployment_rpm is not None:
|
||||||
|
if total_rpm is None:
|
||||||
|
total_rpm = 0
|
||||||
|
total_rpm += _deployment_rpm # type: ignore
|
||||||
# get model info
|
# get model info
|
||||||
try:
|
try:
|
||||||
model_info = litellm.get_model_info(model=litellm_params.model)
|
model_info = litellm.get_model_info(model=litellm_params.model)
|
||||||
|
@ -3987,8 +4082,44 @@ class Router:
|
||||||
"supported_openai_params"
|
"supported_openai_params"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
## UPDATE WITH TOTAL TPM/RPM FOR MODEL GROUP
|
||||||
|
if total_tpm is not None and model_group_info is not None:
|
||||||
|
model_group_info.tpm = total_tpm
|
||||||
|
|
||||||
|
if total_rpm is not None and model_group_info is not None:
|
||||||
|
model_group_info.rpm = total_rpm
|
||||||
|
|
||||||
return model_group_info
|
return model_group_info
|
||||||
|
|
||||||
|
async def get_model_group_usage(self, model_group: str) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
Returns remaining tpm quota for model group
|
||||||
|
"""
|
||||||
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime(
|
||||||
|
"%H-%M"
|
||||||
|
) # use the same timezone regardless of system clock
|
||||||
|
tpm_keys: List[str] = []
|
||||||
|
for model in self.model_list:
|
||||||
|
if "model_name" in model and model["model_name"] == model_group:
|
||||||
|
tpm_keys.append(
|
||||||
|
f"global_router:{model['model_info']['id']}:tpm:{current_minute}"
|
||||||
|
)
|
||||||
|
|
||||||
|
## TPM
|
||||||
|
tpm_usage_list: Optional[List] = await self.cache.async_batch_get_cache(
|
||||||
|
keys=tpm_keys
|
||||||
|
)
|
||||||
|
tpm_usage: Optional[int] = None
|
||||||
|
if tpm_usage_list is not None:
|
||||||
|
for t in tpm_usage_list:
|
||||||
|
if isinstance(t, int):
|
||||||
|
if tpm_usage is None:
|
||||||
|
tpm_usage = 0
|
||||||
|
tpm_usage += t
|
||||||
|
|
||||||
|
return tpm_usage
|
||||||
|
|
||||||
def get_model_ids(self) -> List[str]:
|
def get_model_ids(self) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Returns list of model id's.
|
Returns list of model id's.
|
||||||
|
@ -4916,7 +5047,7 @@ class Router:
|
||||||
def reset(self):
|
def reset(self):
|
||||||
## clean up on close
|
## clean up on close
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
litellm.__async_success_callback = []
|
litellm._async_success_callback = []
|
||||||
litellm.failure_callback = []
|
litellm.failure_callback = []
|
||||||
litellm._async_failure_callback = []
|
litellm._async_failure_callback = []
|
||||||
self.retry_policy = None
|
self.retry_policy = None
|
||||||
|
|
|
@ -4,6 +4,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
from typing import Any
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
@ -24,11 +25,21 @@ import pytest
|
||||||
def langfuse_client():
|
def langfuse_client():
|
||||||
import langfuse
|
import langfuse
|
||||||
|
|
||||||
langfuse_client = langfuse.Langfuse(
|
_langfuse_cache_key = (
|
||||||
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
|
f"{os.environ['LANGFUSE_PUBLIC_KEY']}-{os.environ['LANGFUSE_SECRET_KEY']}"
|
||||||
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
|
|
||||||
host=None,
|
|
||||||
)
|
)
|
||||||
|
# use a in memory langfuse client for testing, RAM util on ci/cd gets too high when we init many langfuse clients
|
||||||
|
if _langfuse_cache_key in litellm.in_memory_llm_clients_cache:
|
||||||
|
langfuse_client = litellm.in_memory_llm_clients_cache[_langfuse_cache_key]
|
||||||
|
else:
|
||||||
|
langfuse_client = langfuse.Langfuse(
|
||||||
|
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
|
||||||
|
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
|
||||||
|
host=None,
|
||||||
|
)
|
||||||
|
litellm.in_memory_llm_clients_cache[_langfuse_cache_key] = langfuse_client
|
||||||
|
|
||||||
|
print("NEW LANGFUSE CLIENT")
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"langfuse.Langfuse", MagicMock(return_value=langfuse_client)
|
"langfuse.Langfuse", MagicMock(return_value=langfuse_client)
|
||||||
|
|
|
@ -1,869 +0,0 @@
|
||||||
import asyncio
|
|
||||||
import copy
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
|
||||||
|
|
||||||
import litellm
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
litellm.num_retries = 3
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
os.environ["LANGFUSE_DEBUG"] = "True"
|
|
||||||
import time
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def langfuse_client():
|
|
||||||
import langfuse
|
|
||||||
|
|
||||||
langfuse_client = langfuse.Langfuse(
|
|
||||||
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
|
|
||||||
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
|
|
||||||
host=None,
|
|
||||||
)
|
|
||||||
|
|
||||||
with patch(
|
|
||||||
"langfuse.Langfuse", MagicMock(return_value=langfuse_client)
|
|
||||||
) as mock_langfuse_client:
|
|
||||||
yield mock_langfuse_client()
|
|
||||||
|
|
||||||
|
|
||||||
def search_logs(log_file_path, num_good_logs=1):
|
|
||||||
"""
|
|
||||||
Searches the given log file for logs containing the "/api/public" string.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- log_file_path (str): The path to the log file to be searched.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- None
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
- Exception: If there are any bad logs found in the log file.
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
print("\n searching logs")
|
|
||||||
bad_logs = []
|
|
||||||
good_logs = []
|
|
||||||
all_logs = []
|
|
||||||
try:
|
|
||||||
with open(log_file_path, "r") as log_file:
|
|
||||||
lines = log_file.readlines()
|
|
||||||
print(f"searching logslines: {lines}")
|
|
||||||
for line in lines:
|
|
||||||
all_logs.append(line.strip())
|
|
||||||
if "/api/public" in line:
|
|
||||||
print("Found log with /api/public:")
|
|
||||||
print(line.strip())
|
|
||||||
print("\n\n")
|
|
||||||
match = re.search(
|
|
||||||
r'"POST /api/public/ingestion HTTP/1.1" (\d+) (\d+)',
|
|
||||||
line,
|
|
||||||
)
|
|
||||||
if match:
|
|
||||||
status_code = int(match.group(1))
|
|
||||||
print("STATUS CODE", status_code)
|
|
||||||
if (
|
|
||||||
status_code != 200
|
|
||||||
and status_code != 201
|
|
||||||
and status_code != 207
|
|
||||||
):
|
|
||||||
print("got a BAD log")
|
|
||||||
bad_logs.append(line.strip())
|
|
||||||
else:
|
|
||||||
good_logs.append(line.strip())
|
|
||||||
print("\nBad Logs")
|
|
||||||
print(bad_logs)
|
|
||||||
if len(bad_logs) > 0:
|
|
||||||
raise Exception(f"bad logs, Bad logs = {bad_logs}")
|
|
||||||
assert (
|
|
||||||
len(good_logs) == num_good_logs
|
|
||||||
), f"Did not get expected number of good logs, expected {num_good_logs}, got {len(good_logs)}. All logs \n {all_logs}"
|
|
||||||
print("\nGood Logs")
|
|
||||||
print(good_logs)
|
|
||||||
if len(good_logs) <= 0:
|
|
||||||
raise Exception(
|
|
||||||
f"There were no Good Logs from Langfuse. No logs with /api/public status 200. \nAll logs:{all_logs}"
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
def pre_langfuse_setup():
|
|
||||||
"""
|
|
||||||
Set up the logging for the 'pre_langfuse_setup' function.
|
|
||||||
"""
|
|
||||||
# sends logs to langfuse.log
|
|
||||||
import logging
|
|
||||||
|
|
||||||
# Configure the logging to write to a file
|
|
||||||
logging.basicConfig(filename="langfuse.log", level=logging.DEBUG)
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
# Add a FileHandler to the logger
|
|
||||||
file_handler = logging.FileHandler("langfuse.log", mode="w")
|
|
||||||
file_handler.setLevel(logging.DEBUG)
|
|
||||||
logger.addHandler(file_handler)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def test_langfuse_logging_async():
|
|
||||||
# this tests time added to make langfuse logging calls, vs just acompletion calls
|
|
||||||
try:
|
|
||||||
pre_langfuse_setup()
|
|
||||||
litellm.set_verbose = True
|
|
||||||
|
|
||||||
# Make 5 calls with an empty success_callback
|
|
||||||
litellm.success_callback = []
|
|
||||||
start_time_empty_callback = asyncio.run(make_async_calls())
|
|
||||||
print("done with no callback test")
|
|
||||||
|
|
||||||
print("starting langfuse test")
|
|
||||||
# Make 5 calls with success_callback set to "langfuse"
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
start_time_langfuse = asyncio.run(make_async_calls())
|
|
||||||
print("done with langfuse test")
|
|
||||||
|
|
||||||
# Compare the time for both scenarios
|
|
||||||
print(f"Time taken with success_callback='langfuse': {start_time_langfuse}")
|
|
||||||
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
|
|
||||||
|
|
||||||
# assert the diff is not more than 1 second - this was 5 seconds before the fix
|
|
||||||
assert abs(start_time_langfuse - start_time_empty_callback) < 1
|
|
||||||
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def make_async_calls(metadata=None, **completion_kwargs):
|
|
||||||
tasks = []
|
|
||||||
for _ in range(5):
|
|
||||||
tasks.append(create_async_task())
|
|
||||||
|
|
||||||
# Measure the start time before running the tasks
|
|
||||||
start_time = asyncio.get_event_loop().time()
|
|
||||||
|
|
||||||
# Wait for all tasks to complete
|
|
||||||
responses = await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
# Print the responses when tasks return
|
|
||||||
for idx, response in enumerate(responses):
|
|
||||||
print(f"Response from Task {idx + 1}: {response}")
|
|
||||||
|
|
||||||
# Calculate the total time taken
|
|
||||||
total_time = asyncio.get_event_loop().time() - start_time
|
|
||||||
|
|
||||||
return total_time
|
|
||||||
|
|
||||||
|
|
||||||
def create_async_task(**completion_kwargs):
|
|
||||||
"""
|
|
||||||
Creates an async task for the litellm.acompletion function.
|
|
||||||
This is just the task, but it is not run here.
|
|
||||||
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
|
|
||||||
Any kwargs passed to this function will be passed to the litellm.acompletion function.
|
|
||||||
By default a standard set of arguments are used for the litellm.acompletion function.
|
|
||||||
"""
|
|
||||||
completion_args = {
|
|
||||||
"model": "azure/chatgpt-v-2",
|
|
||||||
"api_version": "2024-02-01",
|
|
||||||
"messages": [{"role": "user", "content": "This is a test"}],
|
|
||||||
"max_tokens": 5,
|
|
||||||
"temperature": 0.7,
|
|
||||||
"timeout": 5,
|
|
||||||
"user": "langfuse_latency_test_user",
|
|
||||||
"mock_response": "It's simple to use and easy to get started",
|
|
||||||
}
|
|
||||||
completion_args.update(completion_kwargs)
|
|
||||||
return asyncio.create_task(litellm.acompletion(**completion_args))
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
@pytest.mark.parametrize("stream", [False, True])
|
|
||||||
async def test_langfuse_logging_without_request_response(stream, langfuse_client):
|
|
||||||
try:
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
|
|
||||||
litellm.set_verbose = True
|
|
||||||
litellm.turn_off_message_logging = True
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
response = await create_async_task(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
stream=stream,
|
|
||||||
metadata={"trace_id": _unique_trace_name},
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
if stream:
|
|
||||||
async for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
|
|
||||||
langfuse_client.flush()
|
|
||||||
await asyncio.sleep(2)
|
|
||||||
|
|
||||||
# get trace with _unique_trace_name
|
|
||||||
trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
|
|
||||||
|
|
||||||
print("trace_from_langfuse", trace)
|
|
||||||
|
|
||||||
_trace_data = trace.data
|
|
||||||
|
|
||||||
assert _trace_data[0].input == {
|
|
||||||
"messages": [{"content": "redacted-by-litellm", "role": "user"}]
|
|
||||||
}
|
|
||||||
assert _trace_data[0].output == {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": "redacted-by-litellm",
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_langfuse_masked_input_output(langfuse_client):
|
|
||||||
"""
|
|
||||||
Test that creates a trace with masked input and output
|
|
||||||
"""
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
for mask_value in [True, False]:
|
|
||||||
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
|
|
||||||
litellm.set_verbose = True
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
response = await create_async_task(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "This is a test"}],
|
|
||||||
metadata={
|
|
||||||
"trace_id": _unique_trace_name,
|
|
||||||
"mask_input": mask_value,
|
|
||||||
"mask_output": mask_value,
|
|
||||||
},
|
|
||||||
mock_response="This is a test response",
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
expected_input = (
|
|
||||||
"redacted-by-litellm"
|
|
||||||
if mask_value
|
|
||||||
else {"messages": [{"content": "This is a test", "role": "user"}]}
|
|
||||||
)
|
|
||||||
expected_output = (
|
|
||||||
"redacted-by-litellm"
|
|
||||||
if mask_value
|
|
||||||
else {"content": "This is a test response", "role": "assistant"}
|
|
||||||
)
|
|
||||||
langfuse_client.flush()
|
|
||||||
await asyncio.sleep(2)
|
|
||||||
|
|
||||||
# get trace with _unique_trace_name
|
|
||||||
trace = langfuse_client.get_trace(id=_unique_trace_name)
|
|
||||||
generations = list(
|
|
||||||
reversed(langfuse_client.get_generations(trace_id=_unique_trace_name).data)
|
|
||||||
)
|
|
||||||
|
|
||||||
assert trace.input == expected_input
|
|
||||||
assert trace.output == expected_output
|
|
||||||
assert generations[0].input == expected_input
|
|
||||||
assert generations[0].output == expected_output
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_alangfuse_logging_metadata(langfuse_client):
|
|
||||||
"""
|
|
||||||
Test that creates multiple traces, with a varying number of generations and sets various metadata fields
|
|
||||||
Confirms that no metadata that is standard within Langfuse is duplicated in the respective trace or generation metadata
|
|
||||||
For trace continuation certain metadata of the trace is overriden with metadata from the last generation based on the update_trace_keys field
|
|
||||||
Version is set for both the trace and the generation
|
|
||||||
Release is just set for the trace
|
|
||||||
Tags is just set for the trace
|
|
||||||
"""
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
litellm.set_verbose = True
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
|
|
||||||
trace_identifiers = {}
|
|
||||||
expected_filtered_metadata_keys = {
|
|
||||||
"trace_name",
|
|
||||||
"trace_id",
|
|
||||||
"existing_trace_id",
|
|
||||||
"trace_user_id",
|
|
||||||
"session_id",
|
|
||||||
"tags",
|
|
||||||
"generation_name",
|
|
||||||
"generation_id",
|
|
||||||
"prompt",
|
|
||||||
}
|
|
||||||
trace_metadata = {
|
|
||||||
"trace_actual_metadata_key": "trace_actual_metadata_value"
|
|
||||||
} # Allows for setting the metadata on the trace
|
|
||||||
run_id = str(uuid.uuid4())
|
|
||||||
session_id = f"litellm-test-session-{run_id}"
|
|
||||||
trace_common_metadata = {
|
|
||||||
"session_id": session_id,
|
|
||||||
"tags": ["litellm-test-tag1", "litellm-test-tag2"],
|
|
||||||
"update_trace_keys": [
|
|
||||||
"output",
|
|
||||||
"trace_metadata",
|
|
||||||
], # Overwrite the following fields in the trace with the last generation's output and the trace_user_id
|
|
||||||
"trace_metadata": trace_metadata,
|
|
||||||
"gen_metadata_key": "gen_metadata_value", # Metadata key that should not be filtered in the generation
|
|
||||||
"trace_release": "litellm-test-release",
|
|
||||||
"version": "litellm-test-version",
|
|
||||||
}
|
|
||||||
for trace_num in range(1, 3): # Two traces
|
|
||||||
metadata = copy.deepcopy(trace_common_metadata)
|
|
||||||
trace_id = f"litellm-test-trace{trace_num}-{run_id}"
|
|
||||||
metadata["trace_id"] = trace_id
|
|
||||||
metadata["trace_name"] = trace_id
|
|
||||||
trace_identifiers[trace_id] = []
|
|
||||||
print(f"Trace: {trace_id}")
|
|
||||||
for generation_num in range(
|
|
||||||
1, trace_num + 1
|
|
||||||
): # Each trace has a number of generations equal to its trace number
|
|
||||||
metadata["trace_user_id"] = f"litellm-test-user{generation_num}-{run_id}"
|
|
||||||
generation_id = (
|
|
||||||
f"litellm-test-trace{trace_num}-generation-{generation_num}-{run_id}"
|
|
||||||
)
|
|
||||||
metadata["generation_id"] = generation_id
|
|
||||||
metadata["generation_name"] = generation_id
|
|
||||||
metadata["trace_metadata"][
|
|
||||||
"generation_id"
|
|
||||||
] = generation_id # Update to test if trace_metadata is overwritten by update trace keys
|
|
||||||
trace_identifiers[trace_id].append(generation_id)
|
|
||||||
print(f"Generation: {generation_id}")
|
|
||||||
response = await create_async_task(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
mock_response=f"{session_id}:{trace_id}:{generation_id}",
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": f"{session_id}:{trace_id}:{generation_id}",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
max_tokens=100,
|
|
||||||
temperature=0.2,
|
|
||||||
metadata=copy.deepcopy(
|
|
||||||
metadata
|
|
||||||
), # Every generation needs its own metadata, langfuse is not async/thread safe without it
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
metadata["existing_trace_id"] = trace_id
|
|
||||||
|
|
||||||
langfuse_client.flush()
|
|
||||||
await asyncio.sleep(10)
|
|
||||||
|
|
||||||
# Tests the metadata filtering and the override of the output to be the last generation
|
|
||||||
for trace_id, generation_ids in trace_identifiers.items():
|
|
||||||
trace = langfuse_client.get_trace(id=trace_id)
|
|
||||||
assert trace.id == trace_id
|
|
||||||
assert trace.session_id == session_id
|
|
||||||
assert trace.metadata != trace_metadata
|
|
||||||
generations = list(
|
|
||||||
reversed(langfuse_client.get_generations(trace_id=trace_id).data)
|
|
||||||
)
|
|
||||||
assert len(generations) == len(generation_ids)
|
|
||||||
assert (
|
|
||||||
trace.input == generations[0].input
|
|
||||||
) # Should be set by the first generation
|
|
||||||
assert (
|
|
||||||
trace.output == generations[-1].output
|
|
||||||
) # Should be overwritten by the last generation according to update_trace_keys
|
|
||||||
assert (
|
|
||||||
trace.metadata != generations[-1].metadata
|
|
||||||
) # Should be overwritten by the last generation according to update_trace_keys
|
|
||||||
assert trace.metadata["generation_id"] == generations[-1].id
|
|
||||||
assert set(trace.tags).issuperset(trace_common_metadata["tags"])
|
|
||||||
print("trace_from_langfuse", trace)
|
|
||||||
for generation_id, generation in zip(generation_ids, generations):
|
|
||||||
assert generation.id == generation_id
|
|
||||||
assert generation.trace_id == trace_id
|
|
||||||
print(
|
|
||||||
"common keys in trace",
|
|
||||||
set(generation.metadata.keys()).intersection(
|
|
||||||
expected_filtered_metadata_keys
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
assert set(generation.metadata.keys()).isdisjoint(
|
|
||||||
expected_filtered_metadata_keys
|
|
||||||
)
|
|
||||||
print("generation_from_langfuse", generation)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
|
||||||
def test_langfuse_logging():
|
|
||||||
try:
|
|
||||||
pre_langfuse_setup()
|
|
||||||
litellm.set_verbose = True
|
|
||||||
response = completion(
|
|
||||||
model="claude-instant-1.2",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.2,
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
# time.sleep(5)
|
|
||||||
# # check langfuse.log to see if there was a failed response
|
|
||||||
# search_logs("langfuse.log")
|
|
||||||
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_langfuse_logging()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
|
||||||
def test_langfuse_logging_stream():
|
|
||||||
try:
|
|
||||||
litellm.set_verbose = True
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a streaming test for llama2 + langfuse",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
max_tokens=20,
|
|
||||||
temperature=0.2,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
|
||||||
pass
|
|
||||||
# print(chunk)
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# test_langfuse_logging_stream()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
|
||||||
def test_langfuse_logging_custom_generation_name():
|
|
||||||
try:
|
|
||||||
litellm.set_verbose = True
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
|
|
||||||
max_tokens=10,
|
|
||||||
metadata={
|
|
||||||
"langfuse/foo": "bar",
|
|
||||||
"langsmith/fizz": "buzz",
|
|
||||||
"prompt_hash": "asdf98u0j9131123",
|
|
||||||
"generation_name": "ishaan-test-generation",
|
|
||||||
"generation_id": "gen-id22",
|
|
||||||
"trace_id": "trace-id22",
|
|
||||||
"trace_user_id": "user-id2",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# test_langfuse_logging_custom_generation_name()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
|
||||||
def test_langfuse_logging_embedding():
|
|
||||||
try:
|
|
||||||
litellm.set_verbose = True
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
response = litellm.embedding(
|
|
||||||
model="text-embedding-ada-002",
|
|
||||||
input=["gm", "ishaan"],
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="beta test - checking langfuse output")
|
|
||||||
def test_langfuse_logging_function_calling():
|
|
||||||
litellm.set_verbose = True
|
|
||||||
function1 = [
|
|
||||||
{
|
|
||||||
"name": "get_current_weather",
|
|
||||||
"description": "Get the current weather in a given location",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
||||||
},
|
|
||||||
"required": ["location"],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "what's the weather in boston"}],
|
|
||||||
temperature=0.1,
|
|
||||||
functions=function1,
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
except litellm.Timeout as e:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# test_langfuse_logging_function_calling()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Need to address this on main")
|
|
||||||
def test_aaalangfuse_existing_trace_id():
|
|
||||||
"""
|
|
||||||
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
|
|
||||||
|
|
||||||
Pass 1 logging object with a trace
|
|
||||||
|
|
||||||
Pass 2nd logging object with the trace id
|
|
||||||
|
|
||||||
Assert no changes to the trace
|
|
||||||
"""
|
|
||||||
# Test - if the logs were sent to the correct team on langfuse
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
import litellm
|
|
||||||
from litellm.integrations.langfuse import LangFuseLogger
|
|
||||||
|
|
||||||
langfuse_Logger = LangFuseLogger(
|
|
||||||
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
|
|
||||||
langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
|
|
||||||
)
|
|
||||||
litellm.success_callback = ["langfuse"]
|
|
||||||
|
|
||||||
# langfuse_args = {'kwargs': { 'start_time': 'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
|
|
||||||
response_obj = litellm.ModelResponse(
|
|
||||||
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
|
|
||||||
choices=[
|
|
||||||
litellm.Choices(
|
|
||||||
finish_reason="stop",
|
|
||||||
index=0,
|
|
||||||
message=litellm.Message(
|
|
||||||
content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
|
|
||||||
role="assistant",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
],
|
|
||||||
created=1714573888,
|
|
||||||
model="gpt-3.5-turbo-0125",
|
|
||||||
object="chat.completion",
|
|
||||||
system_fingerprint="fp_3b956da36b",
|
|
||||||
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
|
|
||||||
)
|
|
||||||
|
|
||||||
### NEW TRACE ###
|
|
||||||
message = [{"role": "user", "content": "what's the weather in boston"}]
|
|
||||||
langfuse_args = {
|
|
||||||
"response_obj": response_obj,
|
|
||||||
"kwargs": {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"litellm_params": {
|
|
||||||
"acompletion": False,
|
|
||||||
"api_key": None,
|
|
||||||
"force_timeout": 600,
|
|
||||||
"logger_fn": None,
|
|
||||||
"verbose": False,
|
|
||||||
"custom_llm_provider": "openai",
|
|
||||||
"api_base": "https://api.openai.com/v1/",
|
|
||||||
"litellm_call_id": None,
|
|
||||||
"model_alias_map": {},
|
|
||||||
"completion_call_id": None,
|
|
||||||
"metadata": None,
|
|
||||||
"model_info": None,
|
|
||||||
"proxy_server_request": None,
|
|
||||||
"preset_cache_key": None,
|
|
||||||
"no-log": False,
|
|
||||||
"stream_response": {},
|
|
||||||
},
|
|
||||||
"messages": message,
|
|
||||||
"optional_params": {"temperature": 0.1, "extra_body": {}},
|
|
||||||
"start_time": "2024-05-01 07:31:27.986164",
|
|
||||||
"stream": False,
|
|
||||||
"user": None,
|
|
||||||
"call_type": "completion",
|
|
||||||
"litellm_call_id": None,
|
|
||||||
"completion_start_time": "2024-05-01 07:31:29.903685",
|
|
||||||
"temperature": 0.1,
|
|
||||||
"extra_body": {},
|
|
||||||
"input": [{"role": "user", "content": "what's the weather in boston"}],
|
|
||||||
"api_key": "my-api-key",
|
|
||||||
"additional_args": {
|
|
||||||
"complete_input_dict": {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "what's the weather in boston"}
|
|
||||||
],
|
|
||||||
"temperature": 0.1,
|
|
||||||
"extra_body": {},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"log_event_type": "successful_api_call",
|
|
||||||
"end_time": "2024-05-01 07:31:29.903685",
|
|
||||||
"cache_hit": None,
|
|
||||||
"response_cost": 6.25e-05,
|
|
||||||
},
|
|
||||||
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
|
|
||||||
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
|
|
||||||
"user_id": None,
|
|
||||||
"print_verbose": litellm.print_verbose,
|
|
||||||
"level": "DEFAULT",
|
|
||||||
"status_message": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
|
|
||||||
|
|
||||||
import langfuse
|
|
||||||
|
|
||||||
langfuse_client = langfuse.Langfuse(
|
|
||||||
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
|
|
||||||
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
|
|
||||||
)
|
|
||||||
|
|
||||||
trace_id = langfuse_response_object["trace_id"]
|
|
||||||
|
|
||||||
assert trace_id is not None
|
|
||||||
|
|
||||||
langfuse_client.flush()
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
print(langfuse_client.get_trace(id=trace_id))
|
|
||||||
|
|
||||||
initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
|
|
||||||
|
|
||||||
### EXISTING TRACE ###
|
|
||||||
|
|
||||||
new_metadata = {"existing_trace_id": trace_id}
|
|
||||||
new_messages = [{"role": "user", "content": "What do you know?"}]
|
|
||||||
new_response_obj = litellm.ModelResponse(
|
|
||||||
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
|
|
||||||
choices=[
|
|
||||||
litellm.Choices(
|
|
||||||
finish_reason="stop",
|
|
||||||
index=0,
|
|
||||||
message=litellm.Message(
|
|
||||||
content="What do I know?",
|
|
||||||
role="assistant",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
],
|
|
||||||
created=1714573888,
|
|
||||||
model="gpt-3.5-turbo-0125",
|
|
||||||
object="chat.completion",
|
|
||||||
system_fingerprint="fp_3b956da36b",
|
|
||||||
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
|
|
||||||
)
|
|
||||||
langfuse_args = {
|
|
||||||
"response_obj": new_response_obj,
|
|
||||||
"kwargs": {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"litellm_params": {
|
|
||||||
"acompletion": False,
|
|
||||||
"api_key": None,
|
|
||||||
"force_timeout": 600,
|
|
||||||
"logger_fn": None,
|
|
||||||
"verbose": False,
|
|
||||||
"custom_llm_provider": "openai",
|
|
||||||
"api_base": "https://api.openai.com/v1/",
|
|
||||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
|
||||||
"model_alias_map": {},
|
|
||||||
"completion_call_id": None,
|
|
||||||
"metadata": new_metadata,
|
|
||||||
"model_info": None,
|
|
||||||
"proxy_server_request": None,
|
|
||||||
"preset_cache_key": None,
|
|
||||||
"no-log": False,
|
|
||||||
"stream_response": {},
|
|
||||||
},
|
|
||||||
"messages": new_messages,
|
|
||||||
"optional_params": {"temperature": 0.1, "extra_body": {}},
|
|
||||||
"start_time": "2024-05-01 07:31:27.986164",
|
|
||||||
"stream": False,
|
|
||||||
"user": None,
|
|
||||||
"call_type": "completion",
|
|
||||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
|
||||||
"completion_start_time": "2024-05-01 07:31:29.903685",
|
|
||||||
"temperature": 0.1,
|
|
||||||
"extra_body": {},
|
|
||||||
"input": [{"role": "user", "content": "what's the weather in boston"}],
|
|
||||||
"api_key": "my-api-key",
|
|
||||||
"additional_args": {
|
|
||||||
"complete_input_dict": {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "what's the weather in boston"}
|
|
||||||
],
|
|
||||||
"temperature": 0.1,
|
|
||||||
"extra_body": {},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"log_event_type": "successful_api_call",
|
|
||||||
"end_time": "2024-05-01 07:31:29.903685",
|
|
||||||
"cache_hit": None,
|
|
||||||
"response_cost": 6.25e-05,
|
|
||||||
},
|
|
||||||
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
|
|
||||||
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
|
|
||||||
"user_id": None,
|
|
||||||
"print_verbose": litellm.print_verbose,
|
|
||||||
"level": "DEFAULT",
|
|
||||||
"status_message": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
|
|
||||||
|
|
||||||
new_trace_id = langfuse_response_object["trace_id"]
|
|
||||||
|
|
||||||
assert new_trace_id == trace_id
|
|
||||||
|
|
||||||
langfuse_client.flush()
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
print(langfuse_client.get_trace(id=trace_id))
|
|
||||||
|
|
||||||
new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
|
|
||||||
|
|
||||||
initial_langfuse_trace_dict = dict(initial_langfuse_trace)
|
|
||||||
initial_langfuse_trace_dict.pop("updatedAt")
|
|
||||||
initial_langfuse_trace_dict.pop("timestamp")
|
|
||||||
|
|
||||||
new_langfuse_trace_dict = dict(new_langfuse_trace)
|
|
||||||
new_langfuse_trace_dict.pop("updatedAt")
|
|
||||||
new_langfuse_trace_dict.pop("timestamp")
|
|
||||||
|
|
||||||
assert initial_langfuse_trace_dict == new_langfuse_trace_dict
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
condition=not os.environ.get("OPENAI_API_KEY", False),
|
|
||||||
reason="Authentication missing for openai",
|
|
||||||
)
|
|
||||||
def test_langfuse_logging_tool_calling():
|
|
||||||
litellm.set_verbose = True
|
|
||||||
|
|
||||||
def get_current_weather(location, unit="fahrenheit"):
|
|
||||||
"""Get the current weather in a given location"""
|
|
||||||
if "tokyo" in location.lower():
|
|
||||||
return json.dumps(
|
|
||||||
{"location": "Tokyo", "temperature": "10", "unit": "celsius"}
|
|
||||||
)
|
|
||||||
elif "san francisco" in location.lower():
|
|
||||||
return json.dumps(
|
|
||||||
{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
|
|
||||||
)
|
|
||||||
elif "paris" in location.lower():
|
|
||||||
return json.dumps(
|
|
||||||
{"location": "Paris", "temperature": "22", "unit": "celsius"}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return json.dumps({"location": location, "temperature": "unknown"})
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "What's the weather like in San Francisco, Tokyo, and Paris?",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
tools = [
|
|
||||||
{
|
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": "get_current_weather",
|
|
||||||
"description": "Get the current weather in a given location",
|
|
||||||
"parameters": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"location": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
},
|
|
||||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
||||||
},
|
|
||||||
"required": ["location"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
response = litellm.completion(
|
|
||||||
model="gpt-3.5-turbo-1106",
|
|
||||||
messages=messages,
|
|
||||||
tools=tools,
|
|
||||||
tool_choice="auto", # auto is default, but we'll be explicit
|
|
||||||
)
|
|
||||||
print("\nLLM Response1:\n", response)
|
|
||||||
response_message = response.choices[0].message
|
|
||||||
tool_calls = response.choices[0].message.tool_calls
|
|
||||||
|
|
||||||
|
|
||||||
# test_langfuse_logging_tool_calling()
|
|
||||||
|
|
||||||
|
|
||||||
def get_langfuse_prompt(name: str):
|
|
||||||
import langfuse
|
|
||||||
from langfuse import Langfuse
|
|
||||||
|
|
||||||
try:
|
|
||||||
langfuse = Langfuse(
|
|
||||||
public_key=os.environ["LANGFUSE_DEV_PUBLIC_KEY"],
|
|
||||||
secret_key=os.environ["LANGFUSE_DEV_SK_KEY"],
|
|
||||||
host=os.environ["LANGFUSE_HOST"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get current production version of a text prompt
|
|
||||||
prompt = langfuse.get_prompt(name=name)
|
|
||||||
return prompt
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"Error getting prompt: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
@pytest.mark.skip(
|
|
||||||
reason="local only test, use this to verify if we can send request to litellm proxy server"
|
|
||||||
)
|
|
||||||
async def test_make_request():
|
|
||||||
response = await litellm.acompletion(
|
|
||||||
model="openai/llama3",
|
|
||||||
api_key="sk-1234",
|
|
||||||
base_url="http://localhost:4000",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"tags": ["openai"],
|
|
||||||
"prompt": get_langfuse_prompt("test-chat"),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
486
litellm/tests/test_dynamic_rate_limit_handler.py
Normal file
486
litellm/tests/test_dynamic_rate_limit_handler.py
Normal file
|
@ -0,0 +1,486 @@
|
||||||
|
# What is this?
|
||||||
|
## Unit tests for 'dynamic_rate_limiter.py`
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import DualCache, Router
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
from litellm.proxy.hooks.dynamic_rate_limiter import (
|
||||||
|
_PROXY_DynamicRateLimitHandler as DynamicRateLimitHandler,
|
||||||
|
)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Basic test cases:
|
||||||
|
|
||||||
|
- If 1 'active' project => give all tpm
|
||||||
|
- If 2 'active' projects => divide tpm in 2
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def dynamic_rate_limit_handler() -> DynamicRateLimitHandler:
|
||||||
|
internal_cache = DualCache()
|
||||||
|
return DynamicRateLimitHandler(internal_usage_cache=internal_cache)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_response() -> litellm.ModelResponse:
|
||||||
|
return litellm.ModelResponse(
|
||||||
|
**{
|
||||||
|
"id": "chatcmpl-abc123",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1699896916,
|
||||||
|
"model": "gpt-3.5-turbo-0125",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_abc123",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"arguments": '{\n"location": "Boston, MA"\n}',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def user_api_key_auth() -> UserAPIKeyAuth:
|
||||||
|
return UserAPIKeyAuth()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("num_projects", [1, 2, 100])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_available_tpm(num_projects, dynamic_rate_limit_handler):
|
||||||
|
model = "my-fake-model"
|
||||||
|
## SET CACHE W/ ACTIVE PROJECTS
|
||||||
|
projects = [str(uuid.uuid4()) for _ in range(num_projects)]
|
||||||
|
|
||||||
|
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
|
||||||
|
model=model, value=projects
|
||||||
|
)
|
||||||
|
|
||||||
|
model_tpm = 100
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
||||||
|
|
||||||
|
## CHECK AVAILABLE TPM PER PROJECT
|
||||||
|
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_availability = int(model_tpm / num_projects)
|
||||||
|
|
||||||
|
assert availability == expected_availability
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_rate_limit_raised(dynamic_rate_limit_handler, user_api_key_auth):
|
||||||
|
"""
|
||||||
|
Unit test. Tests if rate limit error raised when quota exhausted.
|
||||||
|
"""
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
model = "my-fake-model"
|
||||||
|
## SET CACHE W/ ACTIVE PROJECTS
|
||||||
|
projects = [str(uuid.uuid4())]
|
||||||
|
|
||||||
|
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
|
||||||
|
model=model, value=projects
|
||||||
|
)
|
||||||
|
|
||||||
|
model_tpm = 0
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
||||||
|
|
||||||
|
## CHECK AVAILABLE TPM PER PROJECT
|
||||||
|
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_availability = int(model_tpm / 1)
|
||||||
|
|
||||||
|
assert availability == expected_availability
|
||||||
|
|
||||||
|
## CHECK if exception raised
|
||||||
|
|
||||||
|
try:
|
||||||
|
await dynamic_rate_limit_handler.async_pre_call_hook(
|
||||||
|
user_api_key_dict=user_api_key_auth,
|
||||||
|
cache=DualCache(),
|
||||||
|
data={"model": model},
|
||||||
|
call_type="completion",
|
||||||
|
)
|
||||||
|
pytest.fail("Expected this to raise HTTPexception")
|
||||||
|
except HTTPException as e:
|
||||||
|
assert e.status_code == 429 # check if rate limit error raised
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_base_case(dynamic_rate_limit_handler, mock_response):
|
||||||
|
"""
|
||||||
|
If just 1 active project
|
||||||
|
|
||||||
|
it should get all the quota
|
||||||
|
|
||||||
|
= allow request to go through
|
||||||
|
- update token usage
|
||||||
|
- exhaust all tpm with just 1 project
|
||||||
|
- assert ratelimiterror raised at 100%+1 tpm
|
||||||
|
"""
|
||||||
|
model = "my-fake-model"
|
||||||
|
## model tpm - 50
|
||||||
|
model_tpm = 50
|
||||||
|
## tpm per request - 10
|
||||||
|
setattr(
|
||||||
|
mock_response,
|
||||||
|
"usage",
|
||||||
|
litellm.Usage(prompt_tokens=5, completion_tokens=5, total_tokens=10),
|
||||||
|
)
|
||||||
|
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
"mock_response": mock_response,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
||||||
|
|
||||||
|
prev_availability: Optional[int] = None
|
||||||
|
allowed_fails = 1
|
||||||
|
for _ in range(5):
|
||||||
|
try:
|
||||||
|
# check availability
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
## assert availability updated
|
||||||
|
if prev_availability is not None and availability is not None:
|
||||||
|
assert availability == prev_availability - 10
|
||||||
|
|
||||||
|
print(
|
||||||
|
"prev_availability={}, availability={}".format(
|
||||||
|
prev_availability, availability
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
prev_availability = availability
|
||||||
|
|
||||||
|
# make call
|
||||||
|
await llm_router.acompletion(
|
||||||
|
model=model, messages=[{"role": "user", "content": "hey!"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
except Exception:
|
||||||
|
if allowed_fails > 0:
|
||||||
|
allowed_fails -= 1
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_update_cache(
|
||||||
|
dynamic_rate_limit_handler, mock_response, user_api_key_auth
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Check if active project correctly updated
|
||||||
|
"""
|
||||||
|
model = "my-fake-model"
|
||||||
|
model_tpm = 50
|
||||||
|
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
"mock_response": mock_response,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
||||||
|
|
||||||
|
## INITIAL ACTIVE PROJECTS - ASSERT NONE
|
||||||
|
_, _, active_projects = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
assert active_projects is None
|
||||||
|
|
||||||
|
## MAKE CALL
|
||||||
|
await dynamic_rate_limit_handler.async_pre_call_hook(
|
||||||
|
user_api_key_dict=user_api_key_auth,
|
||||||
|
cache=DualCache(),
|
||||||
|
data={"model": model},
|
||||||
|
call_type="completion",
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
## INITIAL ACTIVE PROJECTS - ASSERT 1
|
||||||
|
_, _, active_projects = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
assert active_projects == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("num_projects", [2])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_projects(
|
||||||
|
dynamic_rate_limit_handler, mock_response, num_projects
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
If 2 active project
|
||||||
|
|
||||||
|
it should split 50% each
|
||||||
|
|
||||||
|
- assert available tpm is 0 after 50%+1 tpm calls
|
||||||
|
"""
|
||||||
|
model = "my-fake-model"
|
||||||
|
model_tpm = 50
|
||||||
|
total_tokens_per_call = 10
|
||||||
|
step_tokens_per_call_per_project = total_tokens_per_call / num_projects
|
||||||
|
|
||||||
|
available_tpm_per_project = int(model_tpm / num_projects)
|
||||||
|
|
||||||
|
## SET CACHE W/ ACTIVE PROJECTS
|
||||||
|
projects = [str(uuid.uuid4()) for _ in range(num_projects)]
|
||||||
|
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
|
||||||
|
model=model, value=projects
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_runs = int(available_tpm_per_project / step_tokens_per_call_per_project)
|
||||||
|
|
||||||
|
setattr(
|
||||||
|
mock_response,
|
||||||
|
"usage",
|
||||||
|
litellm.Usage(
|
||||||
|
prompt_tokens=5, completion_tokens=5, total_tokens=total_tokens_per_call
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
"mock_response": mock_response,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
||||||
|
|
||||||
|
prev_availability: Optional[int] = None
|
||||||
|
|
||||||
|
print("expected_runs: {}".format(expected_runs))
|
||||||
|
for i in range(expected_runs + 1):
|
||||||
|
# check availability
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
## assert availability updated
|
||||||
|
if prev_availability is not None and availability is not None:
|
||||||
|
assert (
|
||||||
|
availability == prev_availability - step_tokens_per_call_per_project
|
||||||
|
), "Current Availability: Got={}, Expected={}, Step={}, Tokens per step={}, Initial model tpm={}".format(
|
||||||
|
availability,
|
||||||
|
prev_availability - 10,
|
||||||
|
i,
|
||||||
|
step_tokens_per_call_per_project,
|
||||||
|
model_tpm,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"prev_availability={}, availability={}".format(
|
||||||
|
prev_availability, availability
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
prev_availability = availability
|
||||||
|
|
||||||
|
# make call
|
||||||
|
await llm_router.acompletion(
|
||||||
|
model=model, messages=[{"role": "user", "content": "hey!"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# check availability
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
assert availability == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("num_projects", [2])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_projects_e2e(
|
||||||
|
dynamic_rate_limit_handler, mock_response, num_projects
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
2 parallel calls with different keys, same model
|
||||||
|
|
||||||
|
If 2 active project
|
||||||
|
|
||||||
|
it should split 50% each
|
||||||
|
|
||||||
|
- assert available tpm is 0 after 50%+1 tpm calls
|
||||||
|
"""
|
||||||
|
model = "my-fake-model"
|
||||||
|
model_tpm = 50
|
||||||
|
total_tokens_per_call = 10
|
||||||
|
step_tokens_per_call_per_project = total_tokens_per_call / num_projects
|
||||||
|
|
||||||
|
available_tpm_per_project = int(model_tpm / num_projects)
|
||||||
|
|
||||||
|
## SET CACHE W/ ACTIVE PROJECTS
|
||||||
|
projects = [str(uuid.uuid4()) for _ in range(num_projects)]
|
||||||
|
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
|
||||||
|
model=model, value=projects
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_runs = int(available_tpm_per_project / step_tokens_per_call_per_project)
|
||||||
|
|
||||||
|
setattr(
|
||||||
|
mock_response,
|
||||||
|
"usage",
|
||||||
|
litellm.Usage(
|
||||||
|
prompt_tokens=5, completion_tokens=5, total_tokens=total_tokens_per_call
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
"mock_response": mock_response,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
|
||||||
|
|
||||||
|
prev_availability: Optional[int] = None
|
||||||
|
|
||||||
|
print("expected_runs: {}".format(expected_runs))
|
||||||
|
for i in range(expected_runs + 1):
|
||||||
|
# check availability
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
|
||||||
|
## assert availability updated
|
||||||
|
if prev_availability is not None and availability is not None:
|
||||||
|
assert (
|
||||||
|
availability == prev_availability - step_tokens_per_call_per_project
|
||||||
|
), "Current Availability: Got={}, Expected={}, Step={}, Tokens per step={}, Initial model tpm={}".format(
|
||||||
|
availability,
|
||||||
|
prev_availability - 10,
|
||||||
|
i,
|
||||||
|
step_tokens_per_call_per_project,
|
||||||
|
model_tpm,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"prev_availability={}, availability={}".format(
|
||||||
|
prev_availability, availability
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
prev_availability = availability
|
||||||
|
|
||||||
|
# make call
|
||||||
|
await llm_router.acompletion(
|
||||||
|
model=model, messages=[{"role": "user", "content": "hey!"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# check availability
|
||||||
|
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
assert availability == 0
|
52
litellm/tests/test_proxy_routes.py
Normal file
52
litellm/tests/test_proxy_routes.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
|
||||||
|
# this file is to test litellm/proxy
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.proxy._types import LiteLLMRoutes
|
||||||
|
from litellm.proxy.proxy_server import router
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG, # Set the desired logging level
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_routes_on_litellm_proxy():
|
||||||
|
"""
|
||||||
|
Goal of this test: Test that we have all the critical OpenAI Routes on the Proxy server Fast API router
|
||||||
|
|
||||||
|
|
||||||
|
this prevents accidentelly deleting /threads, or /batches etc
|
||||||
|
"""
|
||||||
|
_all_routes = []
|
||||||
|
for route in router.routes:
|
||||||
|
|
||||||
|
_path_as_str = str(route.path)
|
||||||
|
if ":path" in _path_as_str:
|
||||||
|
# remove the :path
|
||||||
|
_path_as_str = _path_as_str.replace(":path", "")
|
||||||
|
_all_routes.append(_path_as_str)
|
||||||
|
|
||||||
|
print("ALL ROUTES on LiteLLM Proxy:", _all_routes)
|
||||||
|
print("\n\n")
|
||||||
|
print("ALL OPENAI ROUTES:", LiteLLMRoutes.openai_routes.value)
|
||||||
|
|
||||||
|
for route in LiteLLMRoutes.openai_routes.value:
|
||||||
|
assert route in _all_routes
|
|
@ -1730,3 +1730,99 @@ async def test_router_text_completion_client():
|
||||||
print(responses)
|
print(responses)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_response() -> litellm.ModelResponse:
|
||||||
|
return litellm.ModelResponse(
|
||||||
|
**{
|
||||||
|
"id": "chatcmpl-abc123",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1699896916,
|
||||||
|
"model": "gpt-3.5-turbo-0125",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_abc123",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"arguments": '{\n"location": "Boston, MA"\n}',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_router_model_usage(mock_response):
|
||||||
|
"""
|
||||||
|
Test if tracking used model tpm works as expected
|
||||||
|
"""
|
||||||
|
model = "my-fake-model"
|
||||||
|
model_tpm = 100
|
||||||
|
setattr(
|
||||||
|
mock_response,
|
||||||
|
"usage",
|
||||||
|
litellm.Usage(prompt_tokens=5, completion_tokens=5, total_tokens=10),
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"mock_response: {mock_response}")
|
||||||
|
model_tpm = 100
|
||||||
|
llm_router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": model,
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "my-key",
|
||||||
|
"api_base": "my-base",
|
||||||
|
"tpm": model_tpm,
|
||||||
|
"mock_response": mock_response,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
allowed_fails = 1 # allow for changing b/w minutes
|
||||||
|
|
||||||
|
for _ in range(2):
|
||||||
|
try:
|
||||||
|
_ = await llm_router.acompletion(
|
||||||
|
model=model, messages=[{"role": "user", "content": "Hey!"}]
|
||||||
|
)
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
initial_usage = await llm_router.get_model_group_usage(model_group=model)
|
||||||
|
|
||||||
|
# completion call - 10 tokens
|
||||||
|
_ = await llm_router.acompletion(
|
||||||
|
model=model, messages=[{"role": "user", "content": "Hey!"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
updated_usage = await llm_router.get_model_group_usage(model_group=model)
|
||||||
|
|
||||||
|
assert updated_usage == initial_usage + 10 # type: ignore
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
if allowed_fails > 0:
|
||||||
|
print(
|
||||||
|
f"Decrementing allowed_fails: {allowed_fails}.\nReceived error - {str(e)}"
|
||||||
|
)
|
||||||
|
allowed_fails -= 1
|
||||||
|
else:
|
||||||
|
print(f"allowed_fails: {allowed_fails}")
|
||||||
|
raise e
|
||||||
|
|
|
@ -742,7 +742,9 @@ def test_completion_palm_stream():
|
||||||
# test_completion_palm_stream()
|
# test_completion_palm_stream()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_gemini_stream():
|
@pytest.mark.parametrize("sync_mode", [False]) # True,
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_completion_gemini_stream(sync_mode):
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
print("Streaming gemini response")
|
print("Streaming gemini response")
|
||||||
|
@ -750,29 +752,58 @@ def test_completion_gemini_stream():
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "how does a court case get to the Supreme Court?",
|
"content": "Who was Alexander?",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
print("testing gemini streaming")
|
print("testing gemini streaming")
|
||||||
response = completion(model="gemini/gemini-pro", messages=messages, stream=True)
|
|
||||||
print(f"type of response at the top: {response}")
|
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
for idx, chunk in enumerate(response):
|
non_empty_chunks = 0
|
||||||
print(chunk)
|
|
||||||
# print(chunk.choices[0].delta)
|
if sync_mode:
|
||||||
chunk, finished = streaming_format_tests(idx, chunk)
|
response = completion(
|
||||||
if finished:
|
model="gemini/gemini-1.5-flash",
|
||||||
break
|
messages=messages,
|
||||||
complete_response += chunk
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
print(chunk)
|
||||||
|
# print(chunk.choices[0].delta)
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
non_empty_chunks += 1
|
||||||
|
complete_response += chunk
|
||||||
|
else:
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gemini/gemini-1.5-flash",
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
idx = 0
|
||||||
|
async for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
# print(chunk.choices[0].delta)
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
non_empty_chunks += 1
|
||||||
|
complete_response += chunk
|
||||||
|
idx += 1
|
||||||
|
|
||||||
if complete_response.strip() == "":
|
if complete_response.strip() == "":
|
||||||
raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
print(f"completion_response: {complete_response}")
|
print(f"completion_response: {complete_response}")
|
||||||
except litellm.APIError as e:
|
assert non_empty_chunks > 1
|
||||||
|
except litellm.InternalServerError as e:
|
||||||
|
pass
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if "429 Resource has been exhausted":
|
# if "429 Resource has been exhausted":
|
||||||
return
|
# return
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -443,6 +443,8 @@ class ModelGroupInfo(BaseModel):
|
||||||
"chat", "embedding", "completion", "image_generation", "audio_transcription"
|
"chat", "embedding", "completion", "image_generation", "audio_transcription"
|
||||||
]
|
]
|
||||||
] = Field(default="chat")
|
] = Field(default="chat")
|
||||||
|
tpm: Optional[int] = None
|
||||||
|
rpm: Optional[int] = None
|
||||||
supports_parallel_function_calling: bool = Field(default=False)
|
supports_parallel_function_calling: bool = Field(default=False)
|
||||||
supports_vision: bool = Field(default=False)
|
supports_vision: bool = Field(default=False)
|
||||||
supports_function_calling: bool = Field(default=False)
|
supports_function_calling: bool = Field(default=False)
|
||||||
|
|
|
@ -340,14 +340,15 @@ def function_setup(
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
global callback_list, add_breadcrumb, user_logger_fn, Logging
|
global callback_list, add_breadcrumb, user_logger_fn, Logging
|
||||||
|
|
||||||
function_id = kwargs["id"] if "id" in kwargs else None
|
function_id = kwargs["id"] if "id" in kwargs else None
|
||||||
|
|
||||||
if len(litellm.callbacks) > 0:
|
if len(litellm.callbacks) > 0:
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
# check if callback is a string - e.g. "lago", "openmeter"
|
# check if callback is a string - e.g. "lago", "openmeter"
|
||||||
if isinstance(callback, str):
|
if isinstance(callback, str):
|
||||||
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(
|
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||||
callback
|
callback, internal_usage_cache=None, llm_router=None
|
||||||
)
|
)
|
||||||
if any(
|
if any(
|
||||||
isinstance(cb, type(callback))
|
isinstance(cb, type(callback))
|
||||||
|
@ -3895,12 +3896,16 @@ def get_formatted_prompt(
|
||||||
|
|
||||||
|
|
||||||
def get_response_string(response_obj: ModelResponse) -> str:
|
def get_response_string(response_obj: ModelResponse) -> str:
|
||||||
_choices: List[Choices] = response_obj.choices # type: ignore
|
_choices: List[Union[Choices, StreamingChoices]] = response_obj.choices
|
||||||
|
|
||||||
response_str = ""
|
response_str = ""
|
||||||
for choice in _choices:
|
for choice in _choices:
|
||||||
if choice.message.content is not None:
|
if isinstance(choice, Choices):
|
||||||
response_str += choice.message.content
|
if choice.message.content is not None:
|
||||||
|
response_str += choice.message.content
|
||||||
|
elif isinstance(choice, StreamingChoices):
|
||||||
|
if choice.delta.content is not None:
|
||||||
|
response_str += choice.delta.content
|
||||||
|
|
||||||
return response_str
|
return response_str
|
||||||
|
|
||||||
|
@ -9590,6 +9595,11 @@ class CustomStreamWrapper:
|
||||||
litellm.request_timeout
|
litellm.request_timeout
|
||||||
)
|
)
|
||||||
if self.logging_obj is not None:
|
if self.logging_obj is not None:
|
||||||
|
## LOGGING
|
||||||
|
threading.Thread(
|
||||||
|
target=self.logging_obj.failure_handler,
|
||||||
|
args=(e, traceback_exception),
|
||||||
|
).start() # log response
|
||||||
# Handle any exceptions that might occur during streaming
|
# Handle any exceptions that might occur during streaming
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.logging_obj.async_failure_handler(e, traceback_exception)
|
self.logging_obj.async_failure_handler(e, traceback_exception)
|
||||||
|
@ -9597,11 +9607,24 @@ class CustomStreamWrapper:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback_exception = traceback.format_exc()
|
traceback_exception = traceback.format_exc()
|
||||||
# Handle any exceptions that might occur during streaming
|
if self.logging_obj is not None:
|
||||||
asyncio.create_task(
|
## LOGGING
|
||||||
self.logging_obj.async_failure_handler(e, traceback_exception) # type: ignore
|
threading.Thread(
|
||||||
|
target=self.logging_obj.failure_handler,
|
||||||
|
args=(e, traceback_exception),
|
||||||
|
).start() # log response
|
||||||
|
# Handle any exceptions that might occur during streaming
|
||||||
|
asyncio.create_task(
|
||||||
|
self.logging_obj.async_failure_handler(e, traceback_exception) # type: ignore
|
||||||
|
)
|
||||||
|
## Map to OpenAI Exception
|
||||||
|
raise exception_type(
|
||||||
|
model=self.model,
|
||||||
|
custom_llm_provider=self.custom_llm_provider,
|
||||||
|
original_exception=e,
|
||||||
|
completion_kwargs={},
|
||||||
|
extra_kwargs={},
|
||||||
)
|
)
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
class TextCompletionStreamWrapper:
|
class TextCompletionStreamWrapper:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.40.23"
|
version = "1.40.24"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.40.23"
|
version = "1.40.24"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-5b9334558218205d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dDWf4yi4zCe685SxgCnWX\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-42b04008af7da690.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-5b9334558218205d.js"],""]
|
3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-42b04008af7da690.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
||||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-da04a591bae84617.js"],""]
|
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-fd30ae439831db99.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -20,10 +20,19 @@ import {
|
||||||
} from "@/components/networking";
|
} from "@/components/networking";
|
||||||
import { jwtDecode } from "jwt-decode";
|
import { jwtDecode } from "jwt-decode";
|
||||||
import { Form, Button as Button2, message } from "antd";
|
import { Form, Button as Button2, message } from "antd";
|
||||||
|
|
||||||
|
function getCookie(name: string) {
|
||||||
|
console.log("COOKIES", document.cookie)
|
||||||
|
const cookieValue = document.cookie
|
||||||
|
.split('; ')
|
||||||
|
.find(row => row.startsWith(name + '='));
|
||||||
|
return cookieValue ? cookieValue.split('=')[1] : null;
|
||||||
|
}
|
||||||
|
|
||||||
export default function Onboarding() {
|
export default function Onboarding() {
|
||||||
const [form] = Form.useForm();
|
const [form] = Form.useForm();
|
||||||
const searchParams = useSearchParams();
|
const searchParams = useSearchParams();
|
||||||
const token = searchParams.get("token");
|
const token = getCookie('token');
|
||||||
const inviteID = searchParams.get("id");
|
const inviteID = searchParams.get("id");
|
||||||
const [accessToken, setAccessToken] = useState<string | null>(null);
|
const [accessToken, setAccessToken] = useState<string | null>(null);
|
||||||
const [defaultUserEmail, setDefaultUserEmail] = useState<string>("");
|
const [defaultUserEmail, setDefaultUserEmail] = useState<string>("");
|
||||||
|
|
|
@ -19,6 +19,15 @@ import CacheDashboard from "@/components/cache_dashboard";
|
||||||
import { jwtDecode } from "jwt-decode";
|
import { jwtDecode } from "jwt-decode";
|
||||||
import { Typography } from "antd";
|
import { Typography } from "antd";
|
||||||
|
|
||||||
|
function getCookie(name: string) {
|
||||||
|
console.log("COOKIES", document.cookie)
|
||||||
|
const cookieValue = document.cookie
|
||||||
|
.split('; ')
|
||||||
|
.find(row => row.startsWith(name + '='));
|
||||||
|
return cookieValue ? cookieValue.split('=')[1] : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function formatUserRole(userRole: string) {
|
function formatUserRole(userRole: string) {
|
||||||
if (!userRole) {
|
if (!userRole) {
|
||||||
return "Undefined Role";
|
return "Undefined Role";
|
||||||
|
@ -68,7 +77,7 @@ const CreateKeyPage = () => {
|
||||||
const searchParams = useSearchParams();
|
const searchParams = useSearchParams();
|
||||||
const [modelData, setModelData] = useState<any>({ data: [] });
|
const [modelData, setModelData] = useState<any>({ data: [] });
|
||||||
const userID = searchParams.get("userID");
|
const userID = searchParams.get("userID");
|
||||||
const token = searchParams.get("token");
|
const token = getCookie('token');
|
||||||
|
|
||||||
const [page, setPage] = useState("api-keys");
|
const [page, setPage] = useState("api-keys");
|
||||||
const [accessToken, setAccessToken] = useState<string | null>(null);
|
const [accessToken, setAccessToken] = useState<string | null>(null);
|
||||||
|
|
|
@ -24,6 +24,14 @@ type UserSpendData = {
|
||||||
max_budget?: number | null;
|
max_budget?: number | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function getCookie(name: string) {
|
||||||
|
console.log("COOKIES", document.cookie)
|
||||||
|
const cookieValue = document.cookie
|
||||||
|
.split('; ')
|
||||||
|
.find(row => row.startsWith(name + '='));
|
||||||
|
return cookieValue ? cookieValue.split('=')[1] : null;
|
||||||
|
}
|
||||||
|
|
||||||
interface UserDashboardProps {
|
interface UserDashboardProps {
|
||||||
userID: string | null;
|
userID: string | null;
|
||||||
userRole: string | null;
|
userRole: string | null;
|
||||||
|
@ -66,7 +74,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
const viewSpend = searchParams.get("viewSpend");
|
const viewSpend = searchParams.get("viewSpend");
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
|
|
||||||
const token = searchParams.get("token");
|
const token = getCookie('token');
|
||||||
|
|
||||||
const [accessToken, setAccessToken] = useState<string | null>(null);
|
const [accessToken, setAccessToken] = useState<string | null>(null);
|
||||||
const [teamSpend, setTeamSpend] = useState<number | null>(null);
|
const [teamSpend, setTeamSpend] = useState<number | null>(null);
|
||||||
const [userModels, setUserModels] = useState<string[]>([]);
|
const [userModels, setUserModels] = useState<string[]>([]);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue