Merge branch 'main' into litellm_azure_content_filter_fallbacks

This commit is contained in:
Krish Dholakia 2024-06-22 21:28:29 -07:00 committed by GitHub
commit 0454c0781a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
51 changed files with 1650 additions and 1074 deletions

View file

@ -152,3 +152,104 @@ litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-
``` ```
### Dynamic TPM Allocation
Prevent projects from gobbling too much quota.
Dynamically allocate TPM quota to api keys, based on active keys in that minute.
1. Setup config.yaml
```yaml
model_list:
- model_name: my-fake-model
litellm_params:
model: gpt-3.5-turbo
api_key: my-fake-key
mock_response: hello-world
tpm: 60
litellm_settings:
callbacks: ["dynamic_rate_limiter"]
general_settings:
master_key: sk-1234 # OR set `LITELLM_MASTER_KEY=".."` in your .env
database_url: postgres://.. # OR set `DATABASE_URL=".."` in your .env
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```python
"""
- Run 2 concurrent teams calling same model
- model has 60 TPM
- Mock response returns 30 total tokens / request
- Each team will only be able to make 1 request per minute
"""
"""
- Run 2 concurrent teams calling same model
- model has 60 TPM
- Mock response returns 30 total tokens / request
- Each team will only be able to make 1 request per minute
"""
import requests
from openai import OpenAI, RateLimitError
def create_key(api_key: str, base_url: str):
response = requests.post(
url="{}/key/generate".format(base_url),
json={},
headers={
"Authorization": "Bearer {}".format(api_key)
}
)
_response = response.json()
return _response["key"]
key_1 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
key_2 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
# call proxy with key 1 - works
openai_client_1 = OpenAI(api_key=key_1, base_url="http://0.0.0.0:4000")
response = openai_client_1.chat.completions.with_raw_response.create(
model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
)
print("Headers for call 1 - {}".format(response.headers))
_response = response.parse()
print("Total tokens for call - {}".format(_response.usage.total_tokens))
# call proxy with key 2 - works
openai_client_2 = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
response = openai_client_2.chat.completions.with_raw_response.create(
model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
)
print("Headers for call 2 - {}".format(response.headers))
_response = response.parse()
print("Total tokens for call - {}".format(_response.usage.total_tokens))
# call proxy with key 2 - fails
try:
openai_client_2.chat.completions.with_raw_response.create(model="my-fake-model", messages=[{"role": "user", "content": "Hey, how's it going?"}])
raise Exception("This should have failed!")
except RateLimitError as e:
print("This was rate limited b/c - {}".format(str(e)))
```
**Expected Response**
```
This was rate limited b/c - Error code: 429 - {'error': {'message': {'error': 'Key=<hashed_token> over available TPM=0. Model TPM=0, Active keys=2'}, 'type': 'None', 'param': 'None', 'code': 429}}
```

View file

@ -37,7 +37,9 @@ input_callback: List[Union[str, Callable]] = []
success_callback: List[Union[str, Callable]] = [] success_callback: List[Union[str, Callable]] = []
failure_callback: List[Union[str, Callable]] = [] failure_callback: List[Union[str, Callable]] = []
service_callback: List[Union[str, Callable]] = [] service_callback: List[Union[str, Callable]] = []
_custom_logger_compatible_callbacks_literal = Literal["lago", "openmeter", "logfire"] _custom_logger_compatible_callbacks_literal = Literal[
"lago", "openmeter", "logfire", "dynamic_rate_limiter"
]
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = [] callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
_langfuse_default_tags: Optional[ _langfuse_default_tags: Optional[
List[ List[
@ -735,6 +737,7 @@ from .utils import (
client, client,
exception_type, exception_type,
get_optional_params, get_optional_params,
get_response_string,
modify_integration, modify_integration,
token_counter, token_counter,
create_pretrained_tokenizer, create_pretrained_tokenizer,

View file

@ -1,11 +1,12 @@
from datetime import datetime from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Any, Optional, Union
import litellm import litellm
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from .types.services import ServiceTypes, ServiceLoggerPayload
from .integrations.prometheus_services import PrometheusServicesLogger
from .integrations.custom_logger import CustomLogger from .integrations.custom_logger import CustomLogger
from datetime import timedelta from .integrations.prometheus_services import PrometheusServicesLogger
from typing import Union, Optional, TYPE_CHECKING, Any from .types.services import ServiceLoggerPayload, ServiceTypes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
@ -53,8 +54,8 @@ class ServiceLogging(CustomLogger):
call_type: str, call_type: str,
duration: float, duration: float,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
start_time: Optional[datetime] = None, start_time: Optional[Union[datetime, float]] = None,
end_time: Optional[datetime] = None, end_time: Optional[Union[datetime, float]] = None,
): ):
""" """
- For counting if the redis, postgres call is successful - For counting if the redis, postgres call is successful
@ -92,8 +93,8 @@ class ServiceLogging(CustomLogger):
error: Union[str, Exception], error: Union[str, Exception],
call_type: str, call_type: str,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
start_time: Optional[datetime] = None, start_time: Optional[Union[datetime, float]] = None,
end_time: Optional[datetime] = None, end_time: Optional[Union[float, datetime]] = None,
): ):
""" """
- For counting if the redis, postgres call is unsuccessful - For counting if the redis, postgres call is unsuccessful

View file

@ -7,14 +7,21 @@
# #
# Thank you users! We ❤️ you! - Krrish & Ishaan # Thank you users! We ❤️ you! - Krrish & Ishaan
import litellm import ast
import time, logging, asyncio import asyncio
import json, traceback, ast, hashlib import hashlib
from typing import Optional, Literal, List, Union, Any, BinaryIO import json
import logging
import time
import traceback
from datetime import timedelta
from typing import Any, BinaryIO, List, Literal, Optional, Union
from openai._models import BaseModel as OpenAIObject from openai._models import BaseModel as OpenAIObject
import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.types.services import ServiceLoggerPayload, ServiceTypes from litellm.types.services import ServiceLoggerPayload, ServiceTypes
import traceback
def print_verbose(print_statement): def print_verbose(print_statement):
@ -78,6 +85,17 @@ class InMemoryCache(BaseCache):
else: else:
self.set_cache(key=cache_key, value=cache_value) self.set_cache(key=cache_key, value=cache_value)
async def async_set_cache_sadd(self, key, value: List, ttl: Optional[float]):
"""
Add value to set
"""
# get the value
init_value = self.get_cache(key=key) or set()
for val in value:
init_value.add(val)
self.set_cache(key, init_value, ttl=ttl)
return value
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
if key in self.cache_dict: if key in self.cache_dict:
if key in self.ttl_dict: if key in self.ttl_dict:
@ -147,10 +165,12 @@ class RedisCache(BaseCache):
namespace: Optional[str] = None, namespace: Optional[str] = None,
**kwargs, **kwargs,
): ):
from ._redis import get_redis_client, get_redis_connection_pool
from litellm._service_logger import ServiceLogging
import redis import redis
from litellm._service_logger import ServiceLogging
from ._redis import get_redis_client, get_redis_connection_pool
redis_kwargs = {} redis_kwargs = {}
if host is not None: if host is not None:
redis_kwargs["host"] = host redis_kwargs["host"] = host
@ -329,6 +349,7 @@ class RedisCache(BaseCache):
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs), parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
call_type="async_set_cache",
) )
) )
# NON blocking - notify users Redis is throwing an exception # NON blocking - notify users Redis is throwing an exception
@ -448,6 +469,80 @@ class RedisCache(BaseCache):
cache_value, cache_value,
) )
async def async_set_cache_sadd(
self, key, value: List, ttl: Optional[float], **kwargs
):
start_time = time.time()
try:
_redis_client = self.init_async_client()
except Exception as e:
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
call_type="async_set_cache_sadd",
)
)
# NON blocking - notify users Redis is throwing an exception
verbose_logger.error(
"LiteLLM Redis Caching: async set() - Got exception from REDIS %s, Writing value=%s",
str(e),
value,
)
raise e
key = self.check_and_fix_namespace(key=key)
async with _redis_client as redis_client:
print_verbose(
f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
)
try:
await redis_client.sadd(key, *value)
if ttl is not None:
_td = timedelta(seconds=ttl)
await redis_client.expire(key, _td)
print_verbose(
f"Successfully Set ASYNC Redis Cache SADD: key: {key}\nValue {value}\nttl={ttl}"
)
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_success_hook(
service=ServiceTypes.REDIS,
duration=_duration,
call_type="async_set_cache_sadd",
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
)
)
except Exception as e:
end_time = time.time()
_duration = end_time - start_time
asyncio.create_task(
self.service_logger_obj.async_service_failure_hook(
service=ServiceTypes.REDIS,
duration=_duration,
error=e,
call_type="async_set_cache_sadd",
start_time=start_time,
end_time=end_time,
parent_otel_span=_get_parent_otel_span_from_kwargs(kwargs),
)
)
# NON blocking - notify users Redis is throwing an exception
verbose_logger.error(
"LiteLLM Redis Caching: async set_cache_sadd() - Got exception from REDIS %s, Writing value=%s",
str(e),
value,
)
async def batch_cache_write(self, key, value, **kwargs): async def batch_cache_write(self, key, value, **kwargs):
print_verbose( print_verbose(
f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}", f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}",
@ -886,11 +981,10 @@ class RedisSemanticCache(BaseCache):
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
from redisvl.query import VectorQuery
import numpy as np import numpy as np
from redisvl.query import VectorQuery
# query # query
# get the messages # get the messages
messages = kwargs["messages"] messages = kwargs["messages"]
prompt = "".join(message["content"] for message in messages) prompt = "".join(message["content"] for message in messages)
@ -943,7 +1037,8 @@ class RedisSemanticCache(BaseCache):
async def async_set_cache(self, key, value, **kwargs): async def async_set_cache(self, key, value, **kwargs):
import numpy as np import numpy as np
from litellm.proxy.proxy_server import llm_router, llm_model_list
from litellm.proxy.proxy_server import llm_model_list, llm_router
try: try:
await self.index.acreate(overwrite=False) # don't overwrite existing index await self.index.acreate(overwrite=False) # don't overwrite existing index
@ -998,12 +1093,12 @@ class RedisSemanticCache(BaseCache):
async def async_get_cache(self, key, **kwargs): async def async_get_cache(self, key, **kwargs):
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
from redisvl.query import VectorQuery
import numpy as np import numpy as np
from litellm.proxy.proxy_server import llm_router, llm_model_list from redisvl.query import VectorQuery
from litellm.proxy.proxy_server import llm_model_list, llm_router
# query # query
# get the messages # get the messages
messages = kwargs["messages"] messages = kwargs["messages"]
prompt = "".join(message["content"] for message in messages) prompt = "".join(message["content"] for message in messages)
@ -1161,7 +1256,8 @@ class S3Cache(BaseCache):
self.set_cache(key=key, value=value, **kwargs) self.set_cache(key=key, value=value, **kwargs)
def get_cache(self, key, **kwargs): def get_cache(self, key, **kwargs):
import boto3, botocore import boto3
import botocore
try: try:
key = self.key_prefix + key key = self.key_prefix + key
@ -1471,7 +1567,7 @@ class DualCache(BaseCache):
key, value, **kwargs key, value, **kwargs
) )
if self.redis_cache is not None and local_only == False: if self.redis_cache is not None and local_only is False:
result = await self.redis_cache.async_increment(key, value, **kwargs) result = await self.redis_cache.async_increment(key, value, **kwargs)
return result return result
@ -1480,6 +1576,38 @@ class DualCache(BaseCache):
verbose_logger.debug(traceback.format_exc()) verbose_logger.debug(traceback.format_exc())
raise e raise e
async def async_set_cache_sadd(
self, key, value: List, local_only: bool = False, **kwargs
) -> None:
"""
Add value to a set
Key - the key in cache
Value - str - the value you want to add to the set
Returns - None
"""
try:
if self.in_memory_cache is not None:
_ = await self.in_memory_cache.async_set_cache_sadd(
key, value, ttl=kwargs.get("ttl", None)
)
if self.redis_cache is not None and local_only is False:
_ = await self.redis_cache.async_set_cache_sadd(
key, value, ttl=kwargs.get("ttl", None) ** kwargs
)
return None
except Exception as e:
verbose_logger.error(
"LiteLLM Cache: Excepton async set_cache_sadd: {}\n{}".format(
str(e), traceback.format_exc()
)
)
raise e
def flush_cache(self): def flush_cache(self):
if self.in_memory_cache is not None: if self.in_memory_cache is not None:
self.in_memory_cache.flush_cache() self.in_memory_cache.flush_cache()

View file

@ -105,8 +105,8 @@ class OpenTelemetry(CustomLogger):
self, self,
payload: ServiceLoggerPayload, payload: ServiceLoggerPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
start_time: Optional[datetime] = None, start_time: Optional[Union[datetime, float]] = None,
end_time: Optional[datetime] = None, end_time: Optional[Union[datetime, float]] = None,
): ):
from datetime import datetime from datetime import datetime
@ -144,8 +144,8 @@ class OpenTelemetry(CustomLogger):
self, self,
payload: ServiceLoggerPayload, payload: ServiceLoggerPayload,
parent_otel_span: Optional[Span] = None, parent_otel_span: Optional[Span] = None,
start_time: Optional[datetime] = None, start_time: Optional[Union[datetime, float]] = None,
end_time: Optional[datetime] = None, end_time: Optional[Union[float, datetime]] = None,
): ):
from datetime import datetime from datetime import datetime

View file

@ -19,7 +19,8 @@ from litellm import (
turn_off_message_logging, turn_off_message_logging,
verbose_logger, verbose_logger,
) )
from litellm.caching import InMemoryCache, S3Cache
from litellm.caching import InMemoryCache, S3Cache, DualCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.redact_messages import ( from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_logging, redact_message_input_output_from_logging,
@ -1899,7 +1900,11 @@ def set_callbacks(callback_list, function_id=None):
def _init_custom_logger_compatible_class( def _init_custom_logger_compatible_class(
logging_integration: litellm._custom_logger_compatible_callbacks_literal, logging_integration: litellm._custom_logger_compatible_callbacks_literal,
) -> Callable: internal_usage_cache: Optional[DualCache],
llm_router: Optional[
Any
], # expect litellm.Router, but typing errors due to circular import
) -> CustomLogger:
if logging_integration == "lago": if logging_integration == "lago":
for callback in _in_memory_loggers: for callback in _in_memory_loggers:
if isinstance(callback, LagoLogger): if isinstance(callback, LagoLogger):
@ -1935,3 +1940,58 @@ def _init_custom_logger_compatible_class(
_otel_logger = OpenTelemetry(config=otel_config) _otel_logger = OpenTelemetry(config=otel_config)
_in_memory_loggers.append(_otel_logger) _in_memory_loggers.append(_otel_logger)
return _otel_logger # type: ignore return _otel_logger # type: ignore
elif logging_integration == "dynamic_rate_limiter":
from litellm.proxy.hooks.dynamic_rate_limiter import (
_PROXY_DynamicRateLimitHandler,
)
for callback in _in_memory_loggers:
if isinstance(callback, _PROXY_DynamicRateLimitHandler):
return callback # type: ignore
if internal_usage_cache is None:
raise Exception(
"Internal Error: Cache cannot be empty - internal_usage_cache={}".format(
internal_usage_cache
)
)
dynamic_rate_limiter_obj = _PROXY_DynamicRateLimitHandler(
internal_usage_cache=internal_usage_cache
)
if llm_router is not None and isinstance(llm_router, litellm.Router):
dynamic_rate_limiter_obj.update_variables(llm_router=llm_router)
_in_memory_loggers.append(dynamic_rate_limiter_obj)
return dynamic_rate_limiter_obj # type: ignore
def get_custom_logger_compatible_class(
logging_integration: litellm._custom_logger_compatible_callbacks_literal,
) -> Optional[CustomLogger]:
if logging_integration == "lago":
for callback in _in_memory_loggers:
if isinstance(callback, LagoLogger):
return callback
elif logging_integration == "openmeter":
for callback in _in_memory_loggers:
if isinstance(callback, OpenMeterLogger):
return callback
elif logging_integration == "logfire":
if "LOGFIRE_TOKEN" not in os.environ:
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
from litellm.integrations.opentelemetry import OpenTelemetry
for callback in _in_memory_loggers:
if isinstance(callback, OpenTelemetry):
return callback # type: ignore
elif logging_integration == "dynamic_rate_limiter":
from litellm.proxy.hooks.dynamic_rate_limiter import (
_PROXY_DynamicRateLimitHandler,
)
for callback in _in_memory_loggers:
if isinstance(callback, _PROXY_DynamicRateLimitHandler):
return callback # type: ignore
return None

View file

@ -1,63 +1,64 @@
# What is this? # What is this?
## Initial implementation of calling bedrock via httpx client (allows for async calls). ## Initial implementation of calling bedrock via httpx client (allows for async calls).
## V1 - covers cohere + anthropic claude-3 support ## V1 - covers cohere + anthropic claude-3 support
from functools import partial import copy
import os, types
import json import json
from enum import Enum import os
import requests, copy # type: ignore
import time import time
import types
import urllib.parse
import uuid
from enum import Enum
from functools import partial
from typing import ( from typing import (
Any,
AsyncIterator,
Callable, Callable,
Optional, Iterator,
List, List,
Literal, Literal,
Union, Optional,
Any,
TypedDict,
Tuple, Tuple,
Iterator, TypedDict,
AsyncIterator, Union,
)
from litellm.utils import (
ModelResponse,
Usage,
CustomStreamWrapper,
get_secret,
) )
import httpx # type: ignore
import requests # type: ignore
import litellm
from litellm.caching import DualCache
from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.litellm_logging import Logging from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.types.utils import Message, Choices
import litellm, uuid
from .prompt_templates.factory import (
prompt_factory,
custom_prompt,
cohere_message_pt,
construct_tool_use_system_prompt,
extract_between_tags,
parse_xml_params,
contains_tag,
_bedrock_converse_messages_pt,
_bedrock_tools_pt,
)
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler, AsyncHTTPHandler,
HTTPHandler, HTTPHandler,
_get_async_httpx_client, _get_async_httpx_client,
_get_httpx_client, _get_httpx_client,
) )
from .base import BaseLLM
import httpx # type: ignore
from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
from litellm.types.llms.bedrock import * from litellm.types.llms.bedrock import *
import urllib.parse
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
ChatCompletionDeltaChunk,
ChatCompletionResponseMessage, ChatCompletionResponseMessage,
ChatCompletionToolCallChunk, ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk, ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaChunk,
) )
from litellm.caching import DualCache from litellm.types.utils import Choices, Message
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret
from .base import BaseLLM
from .bedrock import BedrockError, ModelResponseIterator, convert_messages_to_prompt
from .prompt_templates.factory import (
_bedrock_converse_messages_pt,
_bedrock_tools_pt,
cohere_message_pt,
construct_tool_use_system_prompt,
contains_tag,
custom_prompt,
extract_between_tags,
parse_xml_params,
prompt_factory,
)
iam_cache = DualCache() iam_cache = DualCache()
@ -171,6 +172,7 @@ async def make_call(
messages: list, messages: list,
logging_obj, logging_obj,
): ):
try:
if client is None: if client is None:
client = _get_async_httpx_client() # Create a new client if none provided client = _get_async_httpx_client() # Create a new client if none provided
@ -191,6 +193,13 @@ async def make_call(
) )
return completion_stream return completion_stream
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise BedrockError(status_code=error_code, message=str(err))
except httpx.TimeoutException as e:
raise BedrockError(status_code=408, message="Timeout error occurred.")
except Exception as e:
raise BedrockError(status_code=500, message=str(e))
def make_sync_call( def make_sync_call(
@ -704,7 +713,6 @@ class BedrockLLM(BaseLLM):
) -> Union[ModelResponse, CustomStreamWrapper]: ) -> Union[ModelResponse, CustomStreamWrapper]:
try: try:
import boto3 import boto3
from botocore.auth import SigV4Auth from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest from botocore.awsrequest import AWSRequest
from botocore.credentials import Credentials from botocore.credentials import Credentials
@ -1650,7 +1658,6 @@ class BedrockConverseLLM(BaseLLM):
): ):
try: try:
import boto3 import boto3
from botocore.auth import SigV4Auth from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest from botocore.awsrequest import AWSRequest
from botocore.credentials import Credentials from botocore.credentials import Credentials
@ -1904,8 +1911,8 @@ class BedrockConverseLLM(BaseLLM):
def get_response_stream_shape(): def get_response_stream_shape():
from botocore.model import ServiceModel
from botocore.loaders import Loader from botocore.loaders import Loader
from botocore.model import ServiceModel
loader = Loader() loader = Loader()
bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2") bedrock_service_dict = loader.load_service_model("bedrock-runtime", "service-2")

View file

@ -1218,6 +1218,7 @@ class ModelResponseIterator:
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk: def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
try: try:
processed_chunk = GenerateContentResponseBody(**chunk) # type: ignore processed_chunk = GenerateContentResponseBody(**chunk) # type: ignore
text = "" text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None tool_use: Optional[ChatCompletionToolCallChunk] = None
is_finished = False is_finished = False
@ -1236,7 +1237,8 @@ class ModelResponseIterator:
finish_reason = map_finish_reason( finish_reason = map_finish_reason(
finish_reason=gemini_chunk["finishReason"] finish_reason=gemini_chunk["finishReason"]
) )
is_finished = True ## DO NOT SET 'finish_reason' = True
## GEMINI SETS FINISHREASON ON EVERY CHUNK!
if "usageMetadata" in processed_chunk: if "usageMetadata" in processed_chunk:
usage = ChatCompletionUsageBlock( usage = ChatCompletionUsageBlock(
@ -1250,7 +1252,7 @@ class ModelResponseIterator:
returned_chunk = GenericStreamingChunk( returned_chunk = GenericStreamingChunk(
text=text, text=text,
tool_use=tool_use, tool_use=tool_use,
is_finished=is_finished, is_finished=False,
finish_reason=finish_reason, finish_reason=finish_reason,
usage=usage, usage=usage,
index=0, index=0,
@ -1268,9 +1270,8 @@ class ModelResponseIterator:
chunk = self.response_iterator.__next__() chunk = self.response_iterator.__next__()
self.coro.send(chunk) self.coro.send(chunk)
if self.events: if self.events:
event = self.events[0] event = self.events.pop(0)
json_chunk = event json_chunk = event
self.events.clear()
return self.chunk_parser(chunk=json_chunk) return self.chunk_parser(chunk=json_chunk)
return GenericStreamingChunk( return GenericStreamingChunk(
text="", text="",
@ -1281,6 +1282,9 @@ class ModelResponseIterator:
tool_use=None, tool_use=None,
) )
except StopIteration: except StopIteration:
if self.events: # flush the events
event = self.events.pop(0) # Remove the first event
return self.chunk_parser(chunk=event)
raise StopIteration raise StopIteration
except ValueError as e: except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e}") raise RuntimeError(f"Error parsing chunk: {e}")
@ -1295,9 +1299,8 @@ class ModelResponseIterator:
chunk = await self.async_response_iterator.__anext__() chunk = await self.async_response_iterator.__anext__()
self.coro.send(chunk) self.coro.send(chunk)
if self.events: if self.events:
event = self.events[0] event = self.events.pop(0)
json_chunk = event json_chunk = event
self.events.clear()
return self.chunk_parser(chunk=json_chunk) return self.chunk_parser(chunk=json_chunk)
return GenericStreamingChunk( return GenericStreamingChunk(
text="", text="",
@ -1308,6 +1311,9 @@ class ModelResponseIterator:
tool_use=None, tool_use=None,
) )
except StopAsyncIteration: except StopAsyncIteration:
if self.events: # flush the events
event = self.events.pop(0) # Remove the first event
return self.chunk_parser(chunk=event)
raise StopAsyncIteration raise StopAsyncIteration
except ValueError as e: except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e}") raise RuntimeError(f"Error parsing chunk: {e}")

View file

@ -428,7 +428,7 @@ def mock_completion(
model: str, model: str,
messages: List, messages: List,
stream: Optional[bool] = False, stream: Optional[bool] = False,
mock_response: Union[str, Exception] = "This is a mock request", mock_response: Union[str, Exception, dict] = "This is a mock request",
mock_tool_calls: Optional[List] = None, mock_tool_calls: Optional[List] = None,
logging=None, logging=None,
custom_llm_provider=None, custom_llm_provider=None,

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-5b9334558218205d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dDWf4yi4zCe685SxgCnWX\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-42b04008af7da690.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-5b9334558218205d.js"],""] 3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-42b04008af7da690.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -2,6 +2,6 @@
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-da04a591bae84617.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-fd30ae439831db99.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,14 +1,10 @@
model_list: model_list:
- model_name: my-fake-model - model_name: my-fake-model
litellm_params: litellm_params:
model: gpt-3.5-turbo model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
api_key: my-fake-key api_key: my-fake-key
mock_response: hello-world aws_bedrock_runtime_endpoint: http://127.0.0.1:8000
- model_name: gpt-4o
litellm_params:
model: azure/gpt-4o
api_base: https://litellm8397336933.openai.azure.com/
api_key: 610f806211ab47f2a694493000045858
litellm_settings: litellm_settings:
content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}] success_callback: ["langfuse"]
failure_callback: ["langfuse"]

View file

@ -30,6 +30,7 @@ model_list:
api_key: os.environ/AZURE_API_KEY api_key: os.environ/AZURE_API_KEY
api_version: 2024-02-15-preview api_version: 2024-02-15-preview
model: azure/chatgpt-v-2 model: azure/chatgpt-v-2
tpm: 100
model_name: gpt-3.5-turbo model_name: gpt-3.5-turbo
- litellm_params: - litellm_params:
model: anthropic.claude-3-sonnet-20240229-v1:0 model: anthropic.claude-3-sonnet-20240229-v1:0
@ -40,6 +41,7 @@ model_list:
api_version: 2024-02-15-preview api_version: 2024-02-15-preview
model: azure/chatgpt-v-2 model: azure/chatgpt-v-2
drop_params: True drop_params: True
tpm: 100
model_name: gpt-3.5-turbo model_name: gpt-3.5-turbo
- model_name: tts - model_name: tts
litellm_params: litellm_params:
@ -67,8 +69,7 @@ model_list:
max_input_tokens: 80920 max_input_tokens: 80920
litellm_settings: litellm_settings:
success_callback: ["langfuse"] callbacks: ["dynamic_rate_limiter"]
failure_callback: ["langfuse"]
# default_team_settings: # default_team_settings:
# - team_id: proj1 # - team_id: proj1
# success_callback: ["langfuse"] # success_callback: ["langfuse"]

View file

@ -188,6 +188,9 @@ class LiteLLMRoutes(enum.Enum):
# audio transcription # audio transcription
"/audio/transcriptions", "/audio/transcriptions",
"/v1/audio/transcriptions", "/v1/audio/transcriptions",
# audio Speech
"/audio/speech",
"/v1/audio/speech",
# moderations # moderations
"/moderations", "/moderations",
"/v1/moderations", "/v1/moderations",

View file

@ -0,0 +1,205 @@
# What is this?
## Allocates dynamic tpm/rpm quota for a project based on current traffic
## Tracks num active projects per minute
import asyncio
import sys
import traceback
from datetime import datetime
from typing import List, Literal, Optional, Tuple, Union
from fastapi import HTTPException
import litellm
from litellm import ModelResponse, Router
from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.router import ModelGroupInfo
from litellm.utils import get_utc_datetime
class DynamicRateLimiterCache:
"""
Thin wrapper on DualCache for this file.
Track number of active projects calling a model.
"""
def __init__(self, cache: DualCache) -> None:
self.cache = cache
self.ttl = 60 # 1 min ttl
async def async_get_cache(self, model: str) -> Optional[int]:
dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
key_name = "{}:{}".format(current_minute, model)
_response = await self.cache.async_get_cache(key=key_name)
response: Optional[int] = None
if _response is not None:
response = len(_response)
return response
async def async_set_cache_sadd(self, model: str, value: List):
"""
Add value to set.
Parameters:
- model: str, the name of the model group
- value: str, the team id
Returns:
- None
Raises:
- Exception, if unable to connect to cache client (if redis caching enabled)
"""
try:
dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
key_name = "{}:{}".format(current_minute, model)
await self.cache.async_set_cache_sadd(
key=key_name, value=value, ttl=self.ttl
)
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.hooks.dynamic_rate_limiter.py::async_set_cache_sadd(): Exception occured - {}\n{}".format(
str(e), traceback.format_exc()
)
)
raise e
class _PROXY_DynamicRateLimitHandler(CustomLogger):
# Class variables or attributes
def __init__(self, internal_usage_cache: DualCache):
self.internal_usage_cache = DynamicRateLimiterCache(cache=internal_usage_cache)
def update_variables(self, llm_router: Router):
self.llm_router = llm_router
async def check_available_tpm(
self, model: str
) -> Tuple[Optional[int], Optional[int], Optional[int]]:
"""
For a given model, get its available tpm
Returns
- Tuple[available_tpm, model_tpm, active_projects]
- available_tpm: int or null - always 0 or positive.
- remaining_model_tpm: int or null. If available tpm is int, then this will be too.
- active_projects: int or null
"""
active_projects = await self.internal_usage_cache.async_get_cache(model=model)
current_model_tpm: Optional[int] = await self.llm_router.get_model_group_usage(
model_group=model
)
model_group_info: Optional[ModelGroupInfo] = (
self.llm_router.get_model_group_info(model_group=model)
)
total_model_tpm: Optional[int] = None
if model_group_info is not None and model_group_info.tpm is not None:
total_model_tpm = model_group_info.tpm
remaining_model_tpm: Optional[int] = None
if total_model_tpm is not None and current_model_tpm is not None:
remaining_model_tpm = total_model_tpm - current_model_tpm
elif total_model_tpm is not None:
remaining_model_tpm = total_model_tpm
available_tpm: Optional[int] = None
if remaining_model_tpm is not None:
if active_projects is not None:
available_tpm = int(remaining_model_tpm / active_projects)
else:
available_tpm = remaining_model_tpm
if available_tpm is not None and available_tpm < 0:
available_tpm = 0
return available_tpm, remaining_model_tpm, active_projects
async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: Literal[
"completion",
"text_completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
],
) -> Optional[
Union[Exception, str, dict]
]: # raise exception if invalid, return a str for the user to receive - if rejected, or return a modified dictionary for passing into litellm
"""
- For a model group
- Check if tpm available
- Raise RateLimitError if no tpm available
"""
if "model" in data:
available_tpm, model_tpm, active_projects = await self.check_available_tpm(
model=data["model"]
)
if available_tpm is not None and available_tpm == 0:
raise HTTPException(
status_code=429,
detail={
"error": "Key={} over available TPM={}. Model TPM={}, Active keys={}".format(
user_api_key_dict.api_key,
available_tpm,
model_tpm,
active_projects,
)
},
)
elif available_tpm is not None:
## UPDATE CACHE WITH ACTIVE PROJECT
asyncio.create_task(
self.internal_usage_cache.async_set_cache_sadd( # this is a set
model=data["model"], # type: ignore
value=[user_api_key_dict.token or "default_key"],
)
)
return None
async def async_post_call_success_hook(
self, user_api_key_dict: UserAPIKeyAuth, response
):
try:
if isinstance(response, ModelResponse):
model_info = self.llm_router.get_model_info(
id=response._hidden_params["model_id"]
)
assert (
model_info is not None
), "Model info for model with id={} is None".format(
response._hidden_params["model_id"]
)
available_tpm, remaining_model_tpm, active_projects = (
await self.check_available_tpm(model=model_info["model_name"])
)
response._hidden_params["additional_headers"] = {
"x-litellm-model_group": model_info["model_name"],
"x-ratelimit-remaining-litellm-project-tokens": available_tpm,
"x-ratelimit-remaining-model-tokens": remaining_model_tpm,
"x-ratelimit-current-active-projects": active_projects,
}
return response
return await super().async_post_call_success_hook(
user_api_key_dict, response
)
except Exception as e:
verbose_proxy_logger.error(
"litellm.proxy.hooks.dynamic_rate_limiter.py::async_post_call_success_hook(): Exception occured - {}\n{}".format(
str(e), traceback.format_exc()
)
)
return response

View file

@ -433,6 +433,7 @@ def get_custom_headers(
version: Optional[str] = None, version: Optional[str] = None,
model_region: Optional[str] = None, model_region: Optional[str] = None,
fastest_response_batch_completion: Optional[bool] = None, fastest_response_batch_completion: Optional[bool] = None,
**kwargs,
) -> dict: ) -> dict:
exclude_values = {"", None} exclude_values = {"", None}
headers = { headers = {
@ -448,6 +449,7 @@ def get_custom_headers(
if fastest_response_batch_completion is not None if fastest_response_batch_completion is not None
else None else None
), ),
**{k: str(v) for k, v in kwargs.items()},
} }
try: try:
return { return {
@ -2524,11 +2526,10 @@ async def async_data_generator(
yield f"data: {done_message}\n\n" yield f"data: {done_message}\n\n"
except Exception as e: except Exception as e:
verbose_proxy_logger.error( verbose_proxy_logger.error(
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format( "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}\n{}".format(
str(e) str(e), traceback.format_exc()
) )
) )
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
original_exception=e, original_exception=e,
@ -2644,7 +2645,9 @@ async def startup_event():
redis_cache=redis_usage_cache redis_cache=redis_usage_cache
) # used by parallel request limiter for rate limiting keys across instances ) # used by parallel request limiter for rate limiting keys across instances
proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made proxy_logging_obj._init_litellm_callbacks(
llm_router=llm_router
) # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types: if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types:
asyncio.create_task( asyncio.create_task(
@ -3061,6 +3064,14 @@ async def chat_completion(
headers=custom_headers, headers=custom_headers,
) )
### CALL HOOKS ### - modify outgoing data
response = await proxy_logging_obj.post_call_success_hook(
user_api_key_dict=user_api_key_dict, response=response
)
hidden_params = getattr(response, "_hidden_params", {}) or {}
additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
@ -3070,14 +3081,10 @@ async def chat_completion(
version=version, version=version,
model_region=getattr(user_api_key_dict, "allowed_model_region", ""), model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
fastest_response_batch_completion=fastest_response_batch_completion, fastest_response_batch_completion=fastest_response_batch_completion,
**additional_headers,
) )
) )
### CALL HOOKS ### - modify outgoing data
response = await proxy_logging_obj.post_call_success_hook(
user_api_key_dict=user_api_key_dict, response=response
)
return response return response
except RejectedRequestError as e: except RejectedRequestError as e:
_data = e.request_data _data = e.request_data
@ -3116,11 +3123,10 @@ async def chat_completion(
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
verbose_proxy_logger.error( verbose_proxy_logger.error(
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format( "litellm.proxy.proxy_server.chat_completion(): Exception occured - {}\n{}".format(
get_error_message_str(e=e) get_error_message_str(e=e), traceback.format_exc()
) )
) )
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -7502,6 +7508,12 @@ async def login(request: Request):
litellm_dashboard_ui += "/ui/" litellm_dashboard_ui += "/ui/"
import jwt import jwt
if litellm_master_key_hash is None:
raise HTTPException(
status_code=500,
detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
)
jwt_token = jwt.encode( jwt_token = jwt.encode(
{ {
"user_id": user_id, "user_id": user_id,
@ -7511,11 +7523,13 @@ async def login(request: Request):
"login_method": "username_password", "login_method": "username_password",
"premium_user": premium_user, "premium_user": premium_user,
}, },
"secret", litellm_master_key_hash,
algorithm="HS256", algorithm="HS256",
) )
litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token litellm_dashboard_ui += "?userID=" + user_id
return RedirectResponse(url=litellm_dashboard_ui, status_code=303) redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
redirect_response.set_cookie(key="token", value=jwt_token)
return redirect_response
elif _user_row is not None: elif _user_row is not None:
""" """
When sharing invite links When sharing invite links
@ -7564,6 +7578,14 @@ async def login(request: Request):
litellm_dashboard_ui += "/ui/" litellm_dashboard_ui += "/ui/"
import jwt import jwt
if litellm_master_key_hash is None:
raise HTTPException(
status_code=500,
detail={
"error": "No master key set, please set LITELLM_MASTER_KEY"
},
)
jwt_token = jwt.encode( jwt_token = jwt.encode(
{ {
"user_id": user_id, "user_id": user_id,
@ -7573,11 +7595,15 @@ async def login(request: Request):
"login_method": "username_password", "login_method": "username_password",
"premium_user": premium_user, "premium_user": premium_user,
}, },
"secret", litellm_master_key_hash,
algorithm="HS256", algorithm="HS256",
) )
litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token litellm_dashboard_ui += "?userID=" + user_id
return RedirectResponse(url=litellm_dashboard_ui, status_code=303) redirect_response = RedirectResponse(
url=litellm_dashboard_ui, status_code=303
)
redirect_response.set_cookie(key="token", value=jwt_token)
return redirect_response
else: else:
raise ProxyException( raise ProxyException(
message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nNot valid credentials for {username}", message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nNot valid credentials for {username}",
@ -7688,6 +7714,12 @@ async def onboarding(invite_link: str):
litellm_dashboard_ui += "/ui/onboarding" litellm_dashboard_ui += "/ui/onboarding"
import jwt import jwt
if litellm_master_key_hash is None:
raise HTTPException(
status_code=500,
detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
)
jwt_token = jwt.encode( jwt_token = jwt.encode(
{ {
"user_id": user_obj.user_id, "user_id": user_obj.user_id,
@ -7697,7 +7729,7 @@ async def onboarding(invite_link: str):
"login_method": "username_password", "login_method": "username_password",
"premium_user": premium_user, "premium_user": premium_user,
}, },
"secret", litellm_master_key_hash,
algorithm="HS256", algorithm="HS256",
) )
@ -8108,6 +8140,12 @@ async def auth_callback(request: Request):
import jwt import jwt
if litellm_master_key_hash is None:
raise HTTPException(
status_code=500,
detail={"error": "No master key set, please set LITELLM_MASTER_KEY"},
)
jwt_token = jwt.encode( jwt_token = jwt.encode(
{ {
"user_id": user_id, "user_id": user_id,
@ -8117,11 +8155,13 @@ async def auth_callback(request: Request):
"login_method": "sso", "login_method": "sso",
"premium_user": premium_user, "premium_user": premium_user,
}, },
"secret", litellm_master_key_hash,
algorithm="HS256", algorithm="HS256",
) )
litellm_dashboard_ui += "?userID=" + user_id + "&token=" + jwt_token litellm_dashboard_ui += "?userID=" + user_id
return RedirectResponse(url=litellm_dashboard_ui) redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
redirect_response.set_cookie(key="token", value=jwt_token)
return redirect_response
#### INVITATION MANAGEMENT #### #### INVITATION MANAGEMENT ####

View file

@ -229,31 +229,32 @@ class ProxyLogging:
if redis_cache is not None: if redis_cache is not None:
self.internal_usage_cache.redis_cache = redis_cache self.internal_usage_cache.redis_cache = redis_cache
def _init_litellm_callbacks(self): def _init_litellm_callbacks(self, llm_router: Optional[litellm.Router] = None):
print_verbose("INITIALIZING LITELLM CALLBACKS!")
self.service_logging_obj = ServiceLogging() self.service_logging_obj = ServiceLogging()
litellm.callbacks.append(self.max_parallel_request_limiter) litellm.callbacks.append(self.max_parallel_request_limiter) # type: ignore
litellm.callbacks.append(self.max_budget_limiter) litellm.callbacks.append(self.max_budget_limiter) # type: ignore
litellm.callbacks.append(self.cache_control_check) litellm.callbacks.append(self.cache_control_check) # type: ignore
litellm.callbacks.append(self.service_logging_obj) litellm.callbacks.append(self.service_logging_obj) # type: ignore
litellm.success_callback.append( litellm.success_callback.append(
self.slack_alerting_instance.response_taking_too_long_callback self.slack_alerting_instance.response_taking_too_long_callback
) )
for callback in litellm.callbacks: for callback in litellm.callbacks:
if isinstance(callback, str): if isinstance(callback, str):
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
callback callback,
internal_usage_cache=self.internal_usage_cache,
llm_router=llm_router,
) )
if callback not in litellm.input_callback: if callback not in litellm.input_callback:
litellm.input_callback.append(callback) litellm.input_callback.append(callback) # type: ignore
if callback not in litellm.success_callback: if callback not in litellm.success_callback:
litellm.success_callback.append(callback) litellm.success_callback.append(callback) # type: ignore
if callback not in litellm.failure_callback: if callback not in litellm.failure_callback:
litellm.failure_callback.append(callback) litellm.failure_callback.append(callback) # type: ignore
if callback not in litellm._async_success_callback: if callback not in litellm._async_success_callback:
litellm._async_success_callback.append(callback) litellm._async_success_callback.append(callback) # type: ignore
if callback not in litellm._async_failure_callback: if callback not in litellm._async_failure_callback:
litellm._async_failure_callback.append(callback) litellm._async_failure_callback.append(callback) # type: ignore
if ( if (
len(litellm.input_callback) > 0 len(litellm.input_callback) > 0
@ -301,10 +302,19 @@ class ProxyLogging:
try: try:
for callback in litellm.callbacks: for callback in litellm.callbacks:
if isinstance(callback, CustomLogger) and "async_pre_call_hook" in vars( _callback: Optional[CustomLogger] = None
callback.__class__ if isinstance(callback, str):
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
callback
)
else:
_callback = callback # type: ignore
if (
_callback is not None
and isinstance(_callback, CustomLogger)
and "async_pre_call_hook" in vars(_callback.__class__)
): ):
response = await callback.async_pre_call_hook( response = await _callback.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
cache=self.call_details["user_api_key_cache"], cache=self.call_details["user_api_key_cache"],
data=data, data=data,
@ -574,8 +584,15 @@ class ProxyLogging:
for callback in litellm.callbacks: for callback in litellm.callbacks:
try: try:
if isinstance(callback, CustomLogger): _callback: Optional[CustomLogger] = None
await callback.async_post_call_failure_hook( if isinstance(callback, str):
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
callback
)
else:
_callback = callback # type: ignore
if _callback is not None and isinstance(_callback, CustomLogger):
await _callback.async_post_call_failure_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
original_exception=original_exception, original_exception=original_exception,
) )
@ -596,8 +613,15 @@ class ProxyLogging:
""" """
for callback in litellm.callbacks: for callback in litellm.callbacks:
try: try:
if isinstance(callback, CustomLogger): _callback: Optional[CustomLogger] = None
await callback.async_post_call_success_hook( if isinstance(callback, str):
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
callback
)
else:
_callback = callback # type: ignore
if _callback is not None and isinstance(_callback, CustomLogger):
await _callback.async_post_call_success_hook(
user_api_key_dict=user_api_key_dict, response=response user_api_key_dict=user_api_key_dict, response=response
) )
except Exception as e: except Exception as e:
@ -615,11 +639,22 @@ class ProxyLogging:
Covers: Covers:
1. /chat/completions 1. /chat/completions
""" """
response_str: Optional[str] = None
if isinstance(response, ModelResponse):
response_str = litellm.get_response_string(response_obj=response)
if response_str is not None:
for callback in litellm.callbacks: for callback in litellm.callbacks:
try: try:
if isinstance(callback, CustomLogger): _callback: Optional[CustomLogger] = None
await callback.async_post_call_streaming_hook( if isinstance(callback, str):
user_api_key_dict=user_api_key_dict, response=response _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
callback
)
else:
_callback = callback # type: ignore
if _callback is not None and isinstance(_callback, CustomLogger):
await _callback.async_post_call_streaming_hook(
user_api_key_dict=user_api_key_dict, response=response_str
) )
except Exception as e: except Exception as e:
raise e raise e

View file

@ -11,6 +11,7 @@ import asyncio
import concurrent import concurrent
import copy import copy
import datetime as datetime_og import datetime as datetime_og
import enum
import hashlib import hashlib
import inspect import inspect
import json import json
@ -90,6 +91,10 @@ from litellm.utils import (
) )
class RoutingArgs(enum.Enum):
ttl = 60 # 1min (RPM/TPM expire key)
class Router: class Router:
model_names: List = [] model_names: List = []
cache_responses: Optional[bool] = False cache_responses: Optional[bool] = False
@ -387,6 +392,11 @@ class Router:
routing_strategy=routing_strategy, routing_strategy=routing_strategy,
routing_strategy_args=routing_strategy_args, routing_strategy_args=routing_strategy_args,
) )
## USAGE TRACKING ##
if isinstance(litellm._async_success_callback, list):
litellm._async_success_callback.append(self.deployment_callback_on_success)
else:
litellm._async_success_callback.append(self.deployment_callback_on_success)
## COOLDOWNS ## ## COOLDOWNS ##
if isinstance(litellm.failure_callback, list): if isinstance(litellm.failure_callback, list):
litellm.failure_callback.append(self.deployment_callback_on_failure) litellm.failure_callback.append(self.deployment_callback_on_failure)
@ -2664,13 +2674,69 @@ class Router:
time.sleep(_timeout) time.sleep(_timeout)
if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES: if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES:
original_exception.max_retries = num_retries setattr(original_exception, "max_retries", num_retries)
original_exception.num_retries = current_attempt setattr(original_exception, "num_retries", current_attempt)
raise original_exception raise original_exception
### HELPER FUNCTIONS ### HELPER FUNCTIONS
async def deployment_callback_on_success(
self,
kwargs, # kwargs to completion
completion_response, # response from completion
start_time,
end_time, # start/end time
):
"""
Track remaining tpm/rpm quota for model in model_list
"""
try:
"""
Update TPM usage on success
"""
if kwargs["litellm_params"].get("metadata") is None:
pass
else:
model_group = kwargs["litellm_params"]["metadata"].get(
"model_group", None
)
id = kwargs["litellm_params"].get("model_info", {}).get("id", None)
if model_group is None or id is None:
return
elif isinstance(id, int):
id = str(id)
total_tokens = completion_response["usage"]["total_tokens"]
# ------------
# Setup values
# ------------
dt = get_utc_datetime()
current_minute = dt.strftime(
"%H-%M"
) # use the same timezone regardless of system clock
tpm_key = f"global_router:{id}:tpm:{current_minute}"
# ------------
# Update usage
# ------------
# update cache
## TPM
await self.cache.async_increment_cache(
key=tpm_key, value=total_tokens, ttl=RoutingArgs.ttl.value
)
except Exception as e:
verbose_router_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}\n{}".format(
str(e), traceback.format_exc()
)
)
pass
def deployment_callback_on_failure( def deployment_callback_on_failure(
self, self,
kwargs, # kwargs to completion kwargs, # kwargs to completion
@ -3870,10 +3936,39 @@ class Router:
model_group_info: Optional[ModelGroupInfo] = None model_group_info: Optional[ModelGroupInfo] = None
total_tpm: Optional[int] = None
total_rpm: Optional[int] = None
for model in self.model_list: for model in self.model_list:
if "model_name" in model and model["model_name"] == model_group: if "model_name" in model and model["model_name"] == model_group:
# model in model group found # # model in model group found #
litellm_params = LiteLLM_Params(**model["litellm_params"]) litellm_params = LiteLLM_Params(**model["litellm_params"])
# get model tpm
_deployment_tpm: Optional[int] = None
if _deployment_tpm is None:
_deployment_tpm = model.get("tpm", None)
if _deployment_tpm is None:
_deployment_tpm = model.get("litellm_params", {}).get("tpm", None)
if _deployment_tpm is None:
_deployment_tpm = model.get("model_info", {}).get("tpm", None)
if _deployment_tpm is not None:
if total_tpm is None:
total_tpm = 0
total_tpm += _deployment_tpm # type: ignore
# get model rpm
_deployment_rpm: Optional[int] = None
if _deployment_rpm is None:
_deployment_rpm = model.get("rpm", None)
if _deployment_rpm is None:
_deployment_rpm = model.get("litellm_params", {}).get("rpm", None)
if _deployment_rpm is None:
_deployment_rpm = model.get("model_info", {}).get("rpm", None)
if _deployment_rpm is not None:
if total_rpm is None:
total_rpm = 0
total_rpm += _deployment_rpm # type: ignore
# get model info # get model info
try: try:
model_info = litellm.get_model_info(model=litellm_params.model) model_info = litellm.get_model_info(model=litellm_params.model)
@ -3987,8 +4082,44 @@ class Router:
"supported_openai_params" "supported_openai_params"
] ]
## UPDATE WITH TOTAL TPM/RPM FOR MODEL GROUP
if total_tpm is not None and model_group_info is not None:
model_group_info.tpm = total_tpm
if total_rpm is not None and model_group_info is not None:
model_group_info.rpm = total_rpm
return model_group_info return model_group_info
async def get_model_group_usage(self, model_group: str) -> Optional[int]:
"""
Returns remaining tpm quota for model group
"""
dt = get_utc_datetime()
current_minute = dt.strftime(
"%H-%M"
) # use the same timezone regardless of system clock
tpm_keys: List[str] = []
for model in self.model_list:
if "model_name" in model and model["model_name"] == model_group:
tpm_keys.append(
f"global_router:{model['model_info']['id']}:tpm:{current_minute}"
)
## TPM
tpm_usage_list: Optional[List] = await self.cache.async_batch_get_cache(
keys=tpm_keys
)
tpm_usage: Optional[int] = None
if tpm_usage_list is not None:
for t in tpm_usage_list:
if isinstance(t, int):
if tpm_usage is None:
tpm_usage = 0
tpm_usage += t
return tpm_usage
def get_model_ids(self) -> List[str]: def get_model_ids(self) -> List[str]:
""" """
Returns list of model id's. Returns list of model id's.
@ -4916,7 +5047,7 @@ class Router:
def reset(self): def reset(self):
## clean up on close ## clean up on close
litellm.success_callback = [] litellm.success_callback = []
litellm.__async_success_callback = [] litellm._async_success_callback = []
litellm.failure_callback = [] litellm.failure_callback = []
litellm._async_failure_callback = [] litellm._async_failure_callback = []
self.retry_policy = None self.retry_policy = None

View file

@ -4,6 +4,7 @@ import json
import logging import logging
import os import os
import sys import sys
from typing import Any
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -24,11 +25,21 @@ import pytest
def langfuse_client(): def langfuse_client():
import langfuse import langfuse
_langfuse_cache_key = (
f"{os.environ['LANGFUSE_PUBLIC_KEY']}-{os.environ['LANGFUSE_SECRET_KEY']}"
)
# use a in memory langfuse client for testing, RAM util on ci/cd gets too high when we init many langfuse clients
if _langfuse_cache_key in litellm.in_memory_llm_clients_cache:
langfuse_client = litellm.in_memory_llm_clients_cache[_langfuse_cache_key]
else:
langfuse_client = langfuse.Langfuse( langfuse_client = langfuse.Langfuse(
public_key=os.environ["LANGFUSE_PUBLIC_KEY"], public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
secret_key=os.environ["LANGFUSE_SECRET_KEY"], secret_key=os.environ["LANGFUSE_SECRET_KEY"],
host=None, host=None,
) )
litellm.in_memory_llm_clients_cache[_langfuse_cache_key] = langfuse_client
print("NEW LANGFUSE CLIENT")
with patch( with patch(
"langfuse.Langfuse", MagicMock(return_value=langfuse_client) "langfuse.Langfuse", MagicMock(return_value=langfuse_client)

View file

@ -1,869 +0,0 @@
import asyncio
import copy
import json
import logging
import os
import sys
from unittest.mock import MagicMock, patch
logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm import completion
litellm.num_retries = 3
litellm.success_callback = ["langfuse"]
os.environ["LANGFUSE_DEBUG"] = "True"
import time
import pytest
@pytest.fixture
def langfuse_client():
import langfuse
langfuse_client = langfuse.Langfuse(
public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
secret_key=os.environ["LANGFUSE_SECRET_KEY"],
host=None,
)
with patch(
"langfuse.Langfuse", MagicMock(return_value=langfuse_client)
) as mock_langfuse_client:
yield mock_langfuse_client()
def search_logs(log_file_path, num_good_logs=1):
"""
Searches the given log file for logs containing the "/api/public" string.
Parameters:
- log_file_path (str): The path to the log file to be searched.
Returns:
- None
Raises:
- Exception: If there are any bad logs found in the log file.
"""
import re
print("\n searching logs")
bad_logs = []
good_logs = []
all_logs = []
try:
with open(log_file_path, "r") as log_file:
lines = log_file.readlines()
print(f"searching logslines: {lines}")
for line in lines:
all_logs.append(line.strip())
if "/api/public" in line:
print("Found log with /api/public:")
print(line.strip())
print("\n\n")
match = re.search(
r'"POST /api/public/ingestion HTTP/1.1" (\d+) (\d+)',
line,
)
if match:
status_code = int(match.group(1))
print("STATUS CODE", status_code)
if (
status_code != 200
and status_code != 201
and status_code != 207
):
print("got a BAD log")
bad_logs.append(line.strip())
else:
good_logs.append(line.strip())
print("\nBad Logs")
print(bad_logs)
if len(bad_logs) > 0:
raise Exception(f"bad logs, Bad logs = {bad_logs}")
assert (
len(good_logs) == num_good_logs
), f"Did not get expected number of good logs, expected {num_good_logs}, got {len(good_logs)}. All logs \n {all_logs}"
print("\nGood Logs")
print(good_logs)
if len(good_logs) <= 0:
raise Exception(
f"There were no Good Logs from Langfuse. No logs with /api/public status 200. \nAll logs:{all_logs}"
)
except Exception as e:
raise e
def pre_langfuse_setup():
"""
Set up the logging for the 'pre_langfuse_setup' function.
"""
# sends logs to langfuse.log
import logging
# Configure the logging to write to a file
logging.basicConfig(filename="langfuse.log", level=logging.DEBUG)
logger = logging.getLogger()
# Add a FileHandler to the logger
file_handler = logging.FileHandler("langfuse.log", mode="w")
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)
return
def test_langfuse_logging_async():
# this tests time added to make langfuse logging calls, vs just acompletion calls
try:
pre_langfuse_setup()
litellm.set_verbose = True
# Make 5 calls with an empty success_callback
litellm.success_callback = []
start_time_empty_callback = asyncio.run(make_async_calls())
print("done with no callback test")
print("starting langfuse test")
# Make 5 calls with success_callback set to "langfuse"
litellm.success_callback = ["langfuse"]
start_time_langfuse = asyncio.run(make_async_calls())
print("done with langfuse test")
# Compare the time for both scenarios
print(f"Time taken with success_callback='langfuse': {start_time_langfuse}")
print(f"Time taken with empty success_callback: {start_time_empty_callback}")
# assert the diff is not more than 1 second - this was 5 seconds before the fix
assert abs(start_time_langfuse - start_time_empty_callback) < 1
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
async def make_async_calls(metadata=None, **completion_kwargs):
tasks = []
for _ in range(5):
tasks.append(create_async_task())
# Measure the start time before running the tasks
start_time = asyncio.get_event_loop().time()
# Wait for all tasks to complete
responses = await asyncio.gather(*tasks)
# Print the responses when tasks return
for idx, response in enumerate(responses):
print(f"Response from Task {idx + 1}: {response}")
# Calculate the total time taken
total_time = asyncio.get_event_loop().time() - start_time
return total_time
def create_async_task(**completion_kwargs):
"""
Creates an async task for the litellm.acompletion function.
This is just the task, but it is not run here.
To run the task it must be awaited or used in other asyncio coroutine execution functions like asyncio.gather.
Any kwargs passed to this function will be passed to the litellm.acompletion function.
By default a standard set of arguments are used for the litellm.acompletion function.
"""
completion_args = {
"model": "azure/chatgpt-v-2",
"api_version": "2024-02-01",
"messages": [{"role": "user", "content": "This is a test"}],
"max_tokens": 5,
"temperature": 0.7,
"timeout": 5,
"user": "langfuse_latency_test_user",
"mock_response": "It's simple to use and easy to get started",
}
completion_args.update(completion_kwargs)
return asyncio.create_task(litellm.acompletion(**completion_args))
@pytest.mark.asyncio
@pytest.mark.parametrize("stream", [False, True])
async def test_langfuse_logging_without_request_response(stream, langfuse_client):
try:
import uuid
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
litellm.set_verbose = True
litellm.turn_off_message_logging = True
litellm.success_callback = ["langfuse"]
response = await create_async_task(
model="gpt-3.5-turbo",
stream=stream,
metadata={"trace_id": _unique_trace_name},
)
print(response)
if stream:
async for chunk in response:
print(chunk)
langfuse_client.flush()
await asyncio.sleep(2)
# get trace with _unique_trace_name
trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
print("trace_from_langfuse", trace)
_trace_data = trace.data
assert _trace_data[0].input == {
"messages": [{"content": "redacted-by-litellm", "role": "user"}]
}
assert _trace_data[0].output == {
"role": "assistant",
"content": "redacted-by-litellm",
}
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
@pytest.mark.asyncio
async def test_langfuse_masked_input_output(langfuse_client):
"""
Test that creates a trace with masked input and output
"""
import uuid
for mask_value in [True, False]:
_unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
response = await create_async_task(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "This is a test"}],
metadata={
"trace_id": _unique_trace_name,
"mask_input": mask_value,
"mask_output": mask_value,
},
mock_response="This is a test response",
)
print(response)
expected_input = (
"redacted-by-litellm"
if mask_value
else {"messages": [{"content": "This is a test", "role": "user"}]}
)
expected_output = (
"redacted-by-litellm"
if mask_value
else {"content": "This is a test response", "role": "assistant"}
)
langfuse_client.flush()
await asyncio.sleep(2)
# get trace with _unique_trace_name
trace = langfuse_client.get_trace(id=_unique_trace_name)
generations = list(
reversed(langfuse_client.get_generations(trace_id=_unique_trace_name).data)
)
assert trace.input == expected_input
assert trace.output == expected_output
assert generations[0].input == expected_input
assert generations[0].output == expected_output
@pytest.mark.asyncio
async def test_alangfuse_logging_metadata(langfuse_client):
"""
Test that creates multiple traces, with a varying number of generations and sets various metadata fields
Confirms that no metadata that is standard within Langfuse is duplicated in the respective trace or generation metadata
For trace continuation certain metadata of the trace is overriden with metadata from the last generation based on the update_trace_keys field
Version is set for both the trace and the generation
Release is just set for the trace
Tags is just set for the trace
"""
import uuid
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
trace_identifiers = {}
expected_filtered_metadata_keys = {
"trace_name",
"trace_id",
"existing_trace_id",
"trace_user_id",
"session_id",
"tags",
"generation_name",
"generation_id",
"prompt",
}
trace_metadata = {
"trace_actual_metadata_key": "trace_actual_metadata_value"
} # Allows for setting the metadata on the trace
run_id = str(uuid.uuid4())
session_id = f"litellm-test-session-{run_id}"
trace_common_metadata = {
"session_id": session_id,
"tags": ["litellm-test-tag1", "litellm-test-tag2"],
"update_trace_keys": [
"output",
"trace_metadata",
], # Overwrite the following fields in the trace with the last generation's output and the trace_user_id
"trace_metadata": trace_metadata,
"gen_metadata_key": "gen_metadata_value", # Metadata key that should not be filtered in the generation
"trace_release": "litellm-test-release",
"version": "litellm-test-version",
}
for trace_num in range(1, 3): # Two traces
metadata = copy.deepcopy(trace_common_metadata)
trace_id = f"litellm-test-trace{trace_num}-{run_id}"
metadata["trace_id"] = trace_id
metadata["trace_name"] = trace_id
trace_identifiers[trace_id] = []
print(f"Trace: {trace_id}")
for generation_num in range(
1, trace_num + 1
): # Each trace has a number of generations equal to its trace number
metadata["trace_user_id"] = f"litellm-test-user{generation_num}-{run_id}"
generation_id = (
f"litellm-test-trace{trace_num}-generation-{generation_num}-{run_id}"
)
metadata["generation_id"] = generation_id
metadata["generation_name"] = generation_id
metadata["trace_metadata"][
"generation_id"
] = generation_id # Update to test if trace_metadata is overwritten by update trace keys
trace_identifiers[trace_id].append(generation_id)
print(f"Generation: {generation_id}")
response = await create_async_task(
model="gpt-3.5-turbo",
mock_response=f"{session_id}:{trace_id}:{generation_id}",
messages=[
{
"role": "user",
"content": f"{session_id}:{trace_id}:{generation_id}",
}
],
max_tokens=100,
temperature=0.2,
metadata=copy.deepcopy(
metadata
), # Every generation needs its own metadata, langfuse is not async/thread safe without it
)
print(response)
metadata["existing_trace_id"] = trace_id
langfuse_client.flush()
await asyncio.sleep(10)
# Tests the metadata filtering and the override of the output to be the last generation
for trace_id, generation_ids in trace_identifiers.items():
trace = langfuse_client.get_trace(id=trace_id)
assert trace.id == trace_id
assert trace.session_id == session_id
assert trace.metadata != trace_metadata
generations = list(
reversed(langfuse_client.get_generations(trace_id=trace_id).data)
)
assert len(generations) == len(generation_ids)
assert (
trace.input == generations[0].input
) # Should be set by the first generation
assert (
trace.output == generations[-1].output
) # Should be overwritten by the last generation according to update_trace_keys
assert (
trace.metadata != generations[-1].metadata
) # Should be overwritten by the last generation according to update_trace_keys
assert trace.metadata["generation_id"] == generations[-1].id
assert set(trace.tags).issuperset(trace_common_metadata["tags"])
print("trace_from_langfuse", trace)
for generation_id, generation in zip(generation_ids, generations):
assert generation.id == generation_id
assert generation.trace_id == trace_id
print(
"common keys in trace",
set(generation.metadata.keys()).intersection(
expected_filtered_metadata_keys
),
)
assert set(generation.metadata.keys()).isdisjoint(
expected_filtered_metadata_keys
)
print("generation_from_langfuse", generation)
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging():
try:
pre_langfuse_setup()
litellm.set_verbose = True
response = completion(
model="claude-instant-1.2",
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
max_tokens=10,
temperature=0.2,
)
print(response)
# time.sleep(5)
# # check langfuse.log to see if there was a failed response
# search_logs("langfuse.log")
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
# test_langfuse_logging()
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_stream():
try:
litellm.set_verbose = True
response = completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "this is a streaming test for llama2 + langfuse",
}
],
max_tokens=20,
temperature=0.2,
stream=True,
)
print(response)
for chunk in response:
pass
# print(chunk)
except litellm.Timeout as e:
pass
except Exception as e:
print(e)
# test_langfuse_logging_stream()
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_custom_generation_name():
try:
litellm.set_verbose = True
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
max_tokens=10,
metadata={
"langfuse/foo": "bar",
"langsmith/fizz": "buzz",
"prompt_hash": "asdf98u0j9131123",
"generation_name": "ishaan-test-generation",
"generation_id": "gen-id22",
"trace_id": "trace-id22",
"trace_user_id": "user-id2",
},
)
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
print(e)
# test_langfuse_logging_custom_generation_name()
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_embedding():
try:
litellm.set_verbose = True
litellm.success_callback = ["langfuse"]
response = litellm.embedding(
model="text-embedding-ada-002",
input=["gm", "ishaan"],
)
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"An exception occurred - {e}")
print(e)
@pytest.mark.skip(reason="beta test - checking langfuse output")
def test_langfuse_logging_function_calling():
litellm.set_verbose = True
function1 = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
try:
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "what's the weather in boston"}],
temperature=0.1,
functions=function1,
)
print(response)
except litellm.Timeout as e:
pass
except Exception as e:
print(e)
# test_langfuse_logging_function_calling()
@pytest.mark.skip(reason="Need to address this on main")
def test_aaalangfuse_existing_trace_id():
"""
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
Pass 1 logging object with a trace
Pass 2nd logging object with the trace id
Assert no changes to the trace
"""
# Test - if the logs were sent to the correct team on langfuse
import datetime
import litellm
from litellm.integrations.langfuse import LangFuseLogger
langfuse_Logger = LangFuseLogger(
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
langfuse_secret=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
litellm.success_callback = ["langfuse"]
# langfuse_args = {'kwargs': { 'start_time': 'end_time': datetime.datetime(2024, 5, 1, 7, 31, 29, 903685), 'user_id': None, 'print_verbose': <function print_verbose at 0x109d1f420>, 'level': 'DEFAULT', 'status_message': None}
response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="I'm sorry, I am an AI assistant and do not have real-time information. I recommend checking a reliable weather website or app for the most up-to-date weather information in Boston.",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
### NEW TRACE ###
message = [{"role": "user", "content": "what's the weather in boston"}]
langfuse_args = {
"response_obj": response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": None,
"model_alias_map": {},
"completion_call_id": None,
"metadata": None,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": message,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": None,
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
import langfuse
langfuse_client = langfuse.Langfuse(
public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
secret_key=os.getenv("LANGFUSE_PROJECT2_SECRET"),
)
trace_id = langfuse_response_object["trace_id"]
assert trace_id is not None
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
initial_langfuse_trace = langfuse_client.get_trace(id=trace_id)
### EXISTING TRACE ###
new_metadata = {"existing_trace_id": trace_id}
new_messages = [{"role": "user", "content": "What do you know?"}]
new_response_obj = litellm.ModelResponse(
id="chatcmpl-9K5HUAbVRqFrMZKXL0WoC295xhguY",
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="What do I know?",
role="assistant",
),
)
],
created=1714573888,
model="gpt-3.5-turbo-0125",
object="chat.completion",
system_fingerprint="fp_3b956da36b",
usage=litellm.Usage(completion_tokens=37, prompt_tokens=14, total_tokens=51),
)
langfuse_args = {
"response_obj": new_response_obj,
"kwargs": {
"model": "gpt-3.5-turbo",
"litellm_params": {
"acompletion": False,
"api_key": None,
"force_timeout": 600,
"logger_fn": None,
"verbose": False,
"custom_llm_provider": "openai",
"api_base": "https://api.openai.com/v1/",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"model_alias_map": {},
"completion_call_id": None,
"metadata": new_metadata,
"model_info": None,
"proxy_server_request": None,
"preset_cache_key": None,
"no-log": False,
"stream_response": {},
},
"messages": new_messages,
"optional_params": {"temperature": 0.1, "extra_body": {}},
"start_time": "2024-05-01 07:31:27.986164",
"stream": False,
"user": None,
"call_type": "completion",
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
"completion_start_time": "2024-05-01 07:31:29.903685",
"temperature": 0.1,
"extra_body": {},
"input": [{"role": "user", "content": "what's the weather in boston"}],
"api_key": "my-api-key",
"additional_args": {
"complete_input_dict": {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "what's the weather in boston"}
],
"temperature": 0.1,
"extra_body": {},
}
},
"log_event_type": "successful_api_call",
"end_time": "2024-05-01 07:31:29.903685",
"cache_hit": None,
"response_cost": 6.25e-05,
},
"start_time": datetime.datetime(2024, 5, 1, 7, 31, 27, 986164),
"end_time": datetime.datetime(2024, 5, 1, 7, 31, 29, 903685),
"user_id": None,
"print_verbose": litellm.print_verbose,
"level": "DEFAULT",
"status_message": None,
}
langfuse_response_object = langfuse_Logger.log_event(**langfuse_args)
new_trace_id = langfuse_response_object["trace_id"]
assert new_trace_id == trace_id
langfuse_client.flush()
time.sleep(2)
print(langfuse_client.get_trace(id=trace_id))
new_langfuse_trace = langfuse_client.get_trace(id=trace_id)
initial_langfuse_trace_dict = dict(initial_langfuse_trace)
initial_langfuse_trace_dict.pop("updatedAt")
initial_langfuse_trace_dict.pop("timestamp")
new_langfuse_trace_dict = dict(new_langfuse_trace)
new_langfuse_trace_dict.pop("updatedAt")
new_langfuse_trace_dict.pop("timestamp")
assert initial_langfuse_trace_dict == new_langfuse_trace_dict
@pytest.mark.skipif(
condition=not os.environ.get("OPENAI_API_KEY", False),
reason="Authentication missing for openai",
)
def test_langfuse_logging_tool_calling():
litellm.set_verbose = True
def get_current_weather(location, unit="fahrenheit"):
"""Get the current weather in a given location"""
if "tokyo" in location.lower():
return json.dumps(
{"location": "Tokyo", "temperature": "10", "unit": "celsius"}
)
elif "san francisco" in location.lower():
return json.dumps(
{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}
)
elif "paris" in location.lower():
return json.dumps(
{"location": "Paris", "temperature": "22", "unit": "celsius"}
)
else:
return json.dumps({"location": location, "temperature": "unknown"})
messages = [
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris?",
}
]
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
response = litellm.completion(
model="gpt-3.5-turbo-1106",
messages=messages,
tools=tools,
tool_choice="auto", # auto is default, but we'll be explicit
)
print("\nLLM Response1:\n", response)
response_message = response.choices[0].message
tool_calls = response.choices[0].message.tool_calls
# test_langfuse_logging_tool_calling()
def get_langfuse_prompt(name: str):
import langfuse
from langfuse import Langfuse
try:
langfuse = Langfuse(
public_key=os.environ["LANGFUSE_DEV_PUBLIC_KEY"],
secret_key=os.environ["LANGFUSE_DEV_SK_KEY"],
host=os.environ["LANGFUSE_HOST"],
)
# Get current production version of a text prompt
prompt = langfuse.get_prompt(name=name)
return prompt
except Exception as e:
raise Exception(f"Error getting prompt: {e}")
@pytest.mark.asyncio
@pytest.mark.skip(
reason="local only test, use this to verify if we can send request to litellm proxy server"
)
async def test_make_request():
response = await litellm.acompletion(
model="openai/llama3",
api_key="sk-1234",
base_url="http://localhost:4000",
messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
extra_body={
"metadata": {
"tags": ["openai"],
"prompt": get_langfuse_prompt("test-chat"),
}
},
)

View file

@ -0,0 +1,486 @@
# What is this?
## Unit tests for 'dynamic_rate_limiter.py`
import asyncio
import os
import random
import sys
import time
import traceback
import uuid
from datetime import datetime
from typing import Optional, Tuple
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import DualCache, Router
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.dynamic_rate_limiter import (
_PROXY_DynamicRateLimitHandler as DynamicRateLimitHandler,
)
"""
Basic test cases:
- If 1 'active' project => give all tpm
- If 2 'active' projects => divide tpm in 2
"""
@pytest.fixture
def dynamic_rate_limit_handler() -> DynamicRateLimitHandler:
internal_cache = DualCache()
return DynamicRateLimitHandler(internal_usage_cache=internal_cache)
@pytest.fixture
def mock_response() -> litellm.ModelResponse:
return litellm.ModelResponse(
**{
"id": "chatcmpl-abc123",
"object": "chat.completion",
"created": 1699896916,
"model": "gpt-3.5-turbo-0125",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call_abc123",
"type": "function",
"function": {
"name": "get_current_weather",
"arguments": '{\n"location": "Boston, MA"\n}',
},
}
],
},
"logprobs": None,
"finish_reason": "tool_calls",
}
],
"usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10},
}
)
@pytest.fixture
def user_api_key_auth() -> UserAPIKeyAuth:
return UserAPIKeyAuth()
@pytest.mark.parametrize("num_projects", [1, 2, 100])
@pytest.mark.asyncio
async def test_available_tpm(num_projects, dynamic_rate_limit_handler):
model = "my-fake-model"
## SET CACHE W/ ACTIVE PROJECTS
projects = [str(uuid.uuid4()) for _ in range(num_projects)]
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
model=model, value=projects
)
model_tpm = 100
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
},
}
]
)
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
## CHECK AVAILABLE TPM PER PROJECT
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
expected_availability = int(model_tpm / num_projects)
assert availability == expected_availability
@pytest.mark.asyncio
async def test_rate_limit_raised(dynamic_rate_limit_handler, user_api_key_auth):
"""
Unit test. Tests if rate limit error raised when quota exhausted.
"""
from fastapi import HTTPException
model = "my-fake-model"
## SET CACHE W/ ACTIVE PROJECTS
projects = [str(uuid.uuid4())]
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
model=model, value=projects
)
model_tpm = 0
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
},
}
]
)
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
## CHECK AVAILABLE TPM PER PROJECT
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
expected_availability = int(model_tpm / 1)
assert availability == expected_availability
## CHECK if exception raised
try:
await dynamic_rate_limit_handler.async_pre_call_hook(
user_api_key_dict=user_api_key_auth,
cache=DualCache(),
data={"model": model},
call_type="completion",
)
pytest.fail("Expected this to raise HTTPexception")
except HTTPException as e:
assert e.status_code == 429 # check if rate limit error raised
pass
@pytest.mark.asyncio
async def test_base_case(dynamic_rate_limit_handler, mock_response):
"""
If just 1 active project
it should get all the quota
= allow request to go through
- update token usage
- exhaust all tpm with just 1 project
- assert ratelimiterror raised at 100%+1 tpm
"""
model = "my-fake-model"
## model tpm - 50
model_tpm = 50
## tpm per request - 10
setattr(
mock_response,
"usage",
litellm.Usage(prompt_tokens=5, completion_tokens=5, total_tokens=10),
)
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
"mock_response": mock_response,
},
}
]
)
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
prev_availability: Optional[int] = None
allowed_fails = 1
for _ in range(5):
try:
# check availability
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
## assert availability updated
if prev_availability is not None and availability is not None:
assert availability == prev_availability - 10
print(
"prev_availability={}, availability={}".format(
prev_availability, availability
)
)
prev_availability = availability
# make call
await llm_router.acompletion(
model=model, messages=[{"role": "user", "content": "hey!"}]
)
await asyncio.sleep(3)
except Exception:
if allowed_fails > 0:
allowed_fails -= 1
else:
raise
@pytest.mark.asyncio
async def test_update_cache(
dynamic_rate_limit_handler, mock_response, user_api_key_auth
):
"""
Check if active project correctly updated
"""
model = "my-fake-model"
model_tpm = 50
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
"mock_response": mock_response,
},
}
]
)
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
## INITIAL ACTIVE PROJECTS - ASSERT NONE
_, _, active_projects = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
assert active_projects is None
## MAKE CALL
await dynamic_rate_limit_handler.async_pre_call_hook(
user_api_key_dict=user_api_key_auth,
cache=DualCache(),
data={"model": model},
call_type="completion",
)
await asyncio.sleep(2)
## INITIAL ACTIVE PROJECTS - ASSERT 1
_, _, active_projects = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
assert active_projects == 1
@pytest.mark.parametrize("num_projects", [2])
@pytest.mark.asyncio
async def test_multiple_projects(
dynamic_rate_limit_handler, mock_response, num_projects
):
"""
If 2 active project
it should split 50% each
- assert available tpm is 0 after 50%+1 tpm calls
"""
model = "my-fake-model"
model_tpm = 50
total_tokens_per_call = 10
step_tokens_per_call_per_project = total_tokens_per_call / num_projects
available_tpm_per_project = int(model_tpm / num_projects)
## SET CACHE W/ ACTIVE PROJECTS
projects = [str(uuid.uuid4()) for _ in range(num_projects)]
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
model=model, value=projects
)
expected_runs = int(available_tpm_per_project / step_tokens_per_call_per_project)
setattr(
mock_response,
"usage",
litellm.Usage(
prompt_tokens=5, completion_tokens=5, total_tokens=total_tokens_per_call
),
)
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
"mock_response": mock_response,
},
}
]
)
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
prev_availability: Optional[int] = None
print("expected_runs: {}".format(expected_runs))
for i in range(expected_runs + 1):
# check availability
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
## assert availability updated
if prev_availability is not None and availability is not None:
assert (
availability == prev_availability - step_tokens_per_call_per_project
), "Current Availability: Got={}, Expected={}, Step={}, Tokens per step={}, Initial model tpm={}".format(
availability,
prev_availability - 10,
i,
step_tokens_per_call_per_project,
model_tpm,
)
print(
"prev_availability={}, availability={}".format(
prev_availability, availability
)
)
prev_availability = availability
# make call
await llm_router.acompletion(
model=model, messages=[{"role": "user", "content": "hey!"}]
)
await asyncio.sleep(3)
# check availability
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
assert availability == 0
@pytest.mark.parametrize("num_projects", [2])
@pytest.mark.asyncio
async def test_multiple_projects_e2e(
dynamic_rate_limit_handler, mock_response, num_projects
):
"""
2 parallel calls with different keys, same model
If 2 active project
it should split 50% each
- assert available tpm is 0 after 50%+1 tpm calls
"""
model = "my-fake-model"
model_tpm = 50
total_tokens_per_call = 10
step_tokens_per_call_per_project = total_tokens_per_call / num_projects
available_tpm_per_project = int(model_tpm / num_projects)
## SET CACHE W/ ACTIVE PROJECTS
projects = [str(uuid.uuid4()) for _ in range(num_projects)]
await dynamic_rate_limit_handler.internal_usage_cache.async_set_cache_sadd(
model=model, value=projects
)
expected_runs = int(available_tpm_per_project / step_tokens_per_call_per_project)
setattr(
mock_response,
"usage",
litellm.Usage(
prompt_tokens=5, completion_tokens=5, total_tokens=total_tokens_per_call
),
)
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
"mock_response": mock_response,
},
}
]
)
dynamic_rate_limit_handler.update_variables(llm_router=llm_router)
prev_availability: Optional[int] = None
print("expected_runs: {}".format(expected_runs))
for i in range(expected_runs + 1):
# check availability
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
## assert availability updated
if prev_availability is not None and availability is not None:
assert (
availability == prev_availability - step_tokens_per_call_per_project
), "Current Availability: Got={}, Expected={}, Step={}, Tokens per step={}, Initial model tpm={}".format(
availability,
prev_availability - 10,
i,
step_tokens_per_call_per_project,
model_tpm,
)
print(
"prev_availability={}, availability={}".format(
prev_availability, availability
)
)
prev_availability = availability
# make call
await llm_router.acompletion(
model=model, messages=[{"role": "user", "content": "hey!"}]
)
await asyncio.sleep(3)
# check availability
availability, _, _ = await dynamic_rate_limit_handler.check_available_tpm(
model=model
)
assert availability == 0

View file

@ -0,0 +1,52 @@
import os
import sys
from dotenv import load_dotenv
load_dotenv()
import io
import os
# this file is to test litellm/proxy
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import asyncio
import logging
import pytest
import litellm
from litellm.proxy._types import LiteLLMRoutes
from litellm.proxy.proxy_server import router
# Configure logging
logging.basicConfig(
level=logging.DEBUG, # Set the desired logging level
format="%(asctime)s - %(levelname)s - %(message)s",
)
def test_routes_on_litellm_proxy():
"""
Goal of this test: Test that we have all the critical OpenAI Routes on the Proxy server Fast API router
this prevents accidentelly deleting /threads, or /batches etc
"""
_all_routes = []
for route in router.routes:
_path_as_str = str(route.path)
if ":path" in _path_as_str:
# remove the :path
_path_as_str = _path_as_str.replace(":path", "")
_all_routes.append(_path_as_str)
print("ALL ROUTES on LiteLLM Proxy:", _all_routes)
print("\n\n")
print("ALL OPENAI ROUTES:", LiteLLMRoutes.openai_routes.value)
for route in LiteLLMRoutes.openai_routes.value:
assert route in _all_routes

View file

@ -1730,3 +1730,99 @@ async def test_router_text_completion_client():
print(responses) print(responses)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.fixture
def mock_response() -> litellm.ModelResponse:
return litellm.ModelResponse(
**{
"id": "chatcmpl-abc123",
"object": "chat.completion",
"created": 1699896916,
"model": "gpt-3.5-turbo-0125",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call_abc123",
"type": "function",
"function": {
"name": "get_current_weather",
"arguments": '{\n"location": "Boston, MA"\n}',
},
}
],
},
"logprobs": None,
"finish_reason": "tool_calls",
}
],
"usage": {"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10},
}
)
@pytest.mark.asyncio
async def test_router_model_usage(mock_response):
"""
Test if tracking used model tpm works as expected
"""
model = "my-fake-model"
model_tpm = 100
setattr(
mock_response,
"usage",
litellm.Usage(prompt_tokens=5, completion_tokens=5, total_tokens=10),
)
print(f"mock_response: {mock_response}")
model_tpm = 100
llm_router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"tpm": model_tpm,
"mock_response": mock_response,
},
}
]
)
allowed_fails = 1 # allow for changing b/w minutes
for _ in range(2):
try:
_ = await llm_router.acompletion(
model=model, messages=[{"role": "user", "content": "Hey!"}]
)
await asyncio.sleep(3)
initial_usage = await llm_router.get_model_group_usage(model_group=model)
# completion call - 10 tokens
_ = await llm_router.acompletion(
model=model, messages=[{"role": "user", "content": "Hey!"}]
)
await asyncio.sleep(3)
updated_usage = await llm_router.get_model_group_usage(model_group=model)
assert updated_usage == initial_usage + 10 # type: ignore
break
except Exception as e:
if allowed_fails > 0:
print(
f"Decrementing allowed_fails: {allowed_fails}.\nReceived error - {str(e)}"
)
allowed_fails -= 1
else:
print(f"allowed_fails: {allowed_fails}")
raise e

View file

@ -742,7 +742,9 @@ def test_completion_palm_stream():
# test_completion_palm_stream() # test_completion_palm_stream()
def test_completion_gemini_stream(): @pytest.mark.parametrize("sync_mode", [False]) # True,
@pytest.mark.asyncio
async def test_completion_gemini_stream(sync_mode):
try: try:
litellm.set_verbose = True litellm.set_verbose = True
print("Streaming gemini response") print("Streaming gemini response")
@ -750,29 +752,58 @@ def test_completion_gemini_stream():
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{ {
"role": "user", "role": "user",
"content": "how does a court case get to the Supreme Court?", "content": "Who was Alexander?",
}, },
] ]
print("testing gemini streaming") print("testing gemini streaming")
response = completion(model="gemini/gemini-pro", messages=messages, stream=True)
print(f"type of response at the top: {response}")
complete_response = "" complete_response = ""
# Add any assertions here to check the response # Add any assertions here to check the response
non_empty_chunks = 0
if sync_mode:
response = completion(
model="gemini/gemini-1.5-flash",
messages=messages,
stream=True,
)
for idx, chunk in enumerate(response): for idx, chunk in enumerate(response):
print(chunk) print(chunk)
# print(chunk.choices[0].delta) # print(chunk.choices[0].delta)
chunk, finished = streaming_format_tests(idx, chunk) chunk, finished = streaming_format_tests(idx, chunk)
if finished: if finished:
break break
non_empty_chunks += 1
complete_response += chunk complete_response += chunk
else:
response = await litellm.acompletion(
model="gemini/gemini-1.5-flash",
messages=messages,
stream=True,
)
idx = 0
async for chunk in response:
print(chunk)
# print(chunk.choices[0].delta)
chunk, finished = streaming_format_tests(idx, chunk)
if finished:
break
non_empty_chunks += 1
complete_response += chunk
idx += 1
if complete_response.strip() == "": if complete_response.strip() == "":
raise Exception("Empty response received") raise Exception("Empty response received")
print(f"completion_response: {complete_response}") print(f"completion_response: {complete_response}")
except litellm.APIError as e: assert non_empty_chunks > 1
except litellm.InternalServerError as e:
pass
except litellm.RateLimitError as e:
pass pass
except Exception as e: except Exception as e:
if "429 Resource has been exhausted": # if "429 Resource has been exhausted":
return # return
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -443,6 +443,8 @@ class ModelGroupInfo(BaseModel):
"chat", "embedding", "completion", "image_generation", "audio_transcription" "chat", "embedding", "completion", "image_generation", "audio_transcription"
] ]
] = Field(default="chat") ] = Field(default="chat")
tpm: Optional[int] = None
rpm: Optional[int] = None
supports_parallel_function_calling: bool = Field(default=False) supports_parallel_function_calling: bool = Field(default=False)
supports_vision: bool = Field(default=False) supports_vision: bool = Field(default=False)
supports_function_calling: bool = Field(default=False) supports_function_calling: bool = Field(default=False)

View file

@ -340,14 +340,15 @@ def function_setup(
) )
try: try:
global callback_list, add_breadcrumb, user_logger_fn, Logging global callback_list, add_breadcrumb, user_logger_fn, Logging
function_id = kwargs["id"] if "id" in kwargs else None function_id = kwargs["id"] if "id" in kwargs else None
if len(litellm.callbacks) > 0: if len(litellm.callbacks) > 0:
for callback in litellm.callbacks: for callback in litellm.callbacks:
# check if callback is a string - e.g. "lago", "openmeter" # check if callback is a string - e.g. "lago", "openmeter"
if isinstance(callback, str): if isinstance(callback, str):
callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
callback callback, internal_usage_cache=None, llm_router=None
) )
if any( if any(
isinstance(cb, type(callback)) isinstance(cb, type(callback))
@ -3895,12 +3896,16 @@ def get_formatted_prompt(
def get_response_string(response_obj: ModelResponse) -> str: def get_response_string(response_obj: ModelResponse) -> str:
_choices: List[Choices] = response_obj.choices # type: ignore _choices: List[Union[Choices, StreamingChoices]] = response_obj.choices
response_str = "" response_str = ""
for choice in _choices: for choice in _choices:
if isinstance(choice, Choices):
if choice.message.content is not None: if choice.message.content is not None:
response_str += choice.message.content response_str += choice.message.content
elif isinstance(choice, StreamingChoices):
if choice.delta.content is not None:
response_str += choice.delta.content
return response_str return response_str
@ -9590,6 +9595,11 @@ class CustomStreamWrapper:
litellm.request_timeout litellm.request_timeout
) )
if self.logging_obj is not None: if self.logging_obj is not None:
## LOGGING
threading.Thread(
target=self.logging_obj.failure_handler,
args=(e, traceback_exception),
).start() # log response
# Handle any exceptions that might occur during streaming # Handle any exceptions that might occur during streaming
asyncio.create_task( asyncio.create_task(
self.logging_obj.async_failure_handler(e, traceback_exception) self.logging_obj.async_failure_handler(e, traceback_exception)
@ -9597,11 +9607,24 @@ class CustomStreamWrapper:
raise e raise e
except Exception as e: except Exception as e:
traceback_exception = traceback.format_exc() traceback_exception = traceback.format_exc()
if self.logging_obj is not None:
## LOGGING
threading.Thread(
target=self.logging_obj.failure_handler,
args=(e, traceback_exception),
).start() # log response
# Handle any exceptions that might occur during streaming # Handle any exceptions that might occur during streaming
asyncio.create_task( asyncio.create_task(
self.logging_obj.async_failure_handler(e, traceback_exception) # type: ignore self.logging_obj.async_failure_handler(e, traceback_exception) # type: ignore
) )
raise e ## Map to OpenAI Exception
raise exception_type(
model=self.model,
custom_llm_provider=self.custom_llm_provider,
original_exception=e,
completion_kwargs={},
extra_kwargs={},
)
class TextCompletionStreamWrapper: class TextCompletionStreamWrapper:

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.40.23" version = "1.40.24"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -90,7 +90,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.40.23" version = "1.40.24"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return _}});var l=t(3827),n=t(64090),a=t(47907),r=t(16450),i=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),p=t(777),f=t(37963),j=t(60620),g=t(1861);function _(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("id"),[_,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,y]=(0,n.useState)(null),[v,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,p.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,f.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),y(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(r.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",I,"formValues:",e),_&&I&&(e.user_email=S,N&&t&&(0,p.m_)(_,t,N,e.password).then(e=>{var s;let t="/ui/";console.log("redirecting to:",t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+I),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-5b9334558218205d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dDWf4yi4zCe685SxgCnWX\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-42b04008af7da690.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DahySukItzAH9ZoOiMmQB\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-5b9334558218205d.js"],""] 3:I[48951,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-42b04008af7da690.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,6 @@
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-da04a591bae84617.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-fd30ae439831db99.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dDWf4yi4zCe685SxgCnWX",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["DahySukItzAH9ZoOiMmQB",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -20,10 +20,19 @@ import {
} from "@/components/networking"; } from "@/components/networking";
import { jwtDecode } from "jwt-decode"; import { jwtDecode } from "jwt-decode";
import { Form, Button as Button2, message } from "antd"; import { Form, Button as Button2, message } from "antd";
function getCookie(name: string) {
console.log("COOKIES", document.cookie)
const cookieValue = document.cookie
.split('; ')
.find(row => row.startsWith(name + '='));
return cookieValue ? cookieValue.split('=')[1] : null;
}
export default function Onboarding() { export default function Onboarding() {
const [form] = Form.useForm(); const [form] = Form.useForm();
const searchParams = useSearchParams(); const searchParams = useSearchParams();
const token = searchParams.get("token"); const token = getCookie('token');
const inviteID = searchParams.get("id"); const inviteID = searchParams.get("id");
const [accessToken, setAccessToken] = useState<string | null>(null); const [accessToken, setAccessToken] = useState<string | null>(null);
const [defaultUserEmail, setDefaultUserEmail] = useState<string>(""); const [defaultUserEmail, setDefaultUserEmail] = useState<string>("");

View file

@ -19,6 +19,15 @@ import CacheDashboard from "@/components/cache_dashboard";
import { jwtDecode } from "jwt-decode"; import { jwtDecode } from "jwt-decode";
import { Typography } from "antd"; import { Typography } from "antd";
function getCookie(name: string) {
console.log("COOKIES", document.cookie)
const cookieValue = document.cookie
.split('; ')
.find(row => row.startsWith(name + '='));
return cookieValue ? cookieValue.split('=')[1] : null;
}
function formatUserRole(userRole: string) { function formatUserRole(userRole: string) {
if (!userRole) { if (!userRole) {
return "Undefined Role"; return "Undefined Role";
@ -68,7 +77,7 @@ const CreateKeyPage = () => {
const searchParams = useSearchParams(); const searchParams = useSearchParams();
const [modelData, setModelData] = useState<any>({ data: [] }); const [modelData, setModelData] = useState<any>({ data: [] });
const userID = searchParams.get("userID"); const userID = searchParams.get("userID");
const token = searchParams.get("token"); const token = getCookie('token');
const [page, setPage] = useState("api-keys"); const [page, setPage] = useState("api-keys");
const [accessToken, setAccessToken] = useState<string | null>(null); const [accessToken, setAccessToken] = useState<string | null>(null);

View file

@ -24,6 +24,14 @@ type UserSpendData = {
max_budget?: number | null; max_budget?: number | null;
}; };
function getCookie(name: string) {
console.log("COOKIES", document.cookie)
const cookieValue = document.cookie
.split('; ')
.find(row => row.startsWith(name + '='));
return cookieValue ? cookieValue.split('=')[1] : null;
}
interface UserDashboardProps { interface UserDashboardProps {
userID: string | null; userID: string | null;
userRole: string | null; userRole: string | null;
@ -66,7 +74,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
const viewSpend = searchParams.get("viewSpend"); const viewSpend = searchParams.get("viewSpend");
const router = useRouter(); const router = useRouter();
const token = searchParams.get("token"); const token = getCookie('token');
const [accessToken, setAccessToken] = useState<string | null>(null); const [accessToken, setAccessToken] = useState<string | null>(null);
const [teamSpend, setTeamSpend] = useState<number | null>(null); const [teamSpend, setTeamSpend] = useState<number | null>(null);
const [userModels, setUserModels] = useState<string[]>([]); const [userModels, setUserModels] = useState<string[]>([]);