Merge branch 'main' into litellm_invite_link_flow_2

This commit is contained in:
Ishaan Jaff 2024-05-31 08:14:52 -07:00 committed by GitHub
commit f9862be049
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
100 changed files with 5297 additions and 883 deletions

View file

@ -2,7 +2,7 @@ version: 4.3.4
jobs:
local_testing:
docker:
- image: circleci/python:3.9
- image: circleci/python:3.11.8
working_directory: ~/project
steps:
@ -43,7 +43,7 @@ jobs:
pip install "langfuse==2.27.1"
pip install "logfire==0.29.0"
pip install numpydoc
pip install traceloop-sdk==0.18.2
pip install traceloop-sdk==0.21.1
pip install openai
pip install prisma
pip install "httpx==0.24.1"
@ -61,6 +61,7 @@ jobs:
pip install prometheus-client==0.20.0
pip install "pydantic==2.7.1"
pip install "diskcache==5.6.1"
pip install "Pillow==10.3.0"
- save_cache:
paths:
- ./venv

View file

@ -7,6 +7,5 @@ cohere
redis
anthropic
orjson
pydantic==1.10.14
pydantic==2.7.1
google-cloud-aiplatform==1.43.0
redisvl==0.0.7 # semantic caching

View file

@ -1,3 +1,6 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Batching Completion()
LiteLLM allows you to:
* Send many completion calls to 1 model
@ -51,6 +54,9 @@ This makes parallel calls to the specified `models` and returns the first respon
Use this to reduce latency
<Tabs>
<TabItem value="sdk" label="SDK">
### Example Code
```python
import litellm
@ -68,8 +74,93 @@ response = batch_completion_models(
print(result)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
[how to setup proxy config](#example-setup)
Just pass a comma-separated string of model names and the flag `fastest_response=True`.
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl -X POST 'http://localhost:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "gpt-4o, groq-llama", # 👈 Comma-separated models
"messages": [
{
"role": "user",
"content": "What's the weather like in Boston today?"
}
],
"stream": true,
"fastest_response": true # 👈 FLAG
}
'
```
</TabItem>
<TabItem value="openai" label="OpenAI SDK">
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-4o, groq-llama", # 👈 Comma-separated models
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={"fastest_response": true} # 👈 FLAG
)
print(response)
```
</TabItem>
</Tabs>
---
### Example Setup:
```yaml
model_list:
- model_name: groq-llama
litellm_params:
model: groq/llama3-8b-8192
api_key: os.environ/GROQ_API_KEY
- model_name: gpt-4o
litellm_params:
model: gpt-4o
api_key: os.environ/OPENAI_API_KEY
```
```bash
litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000
```
</TabItem>
</Tabs>
### Output
Returns the first response
Returns the first response in OpenAI format. Cancels other LLM API calls.
```json
{
"object": "chat.completion",
@ -95,6 +186,7 @@ Returns the first response
}
```
## Send 1 completion call to many models: Return All Responses
This makes parallel calls to the specified models and returns all responses

View file

@ -178,23 +178,26 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
}
```
**API Spec for Webhook Event**
## **API Spec for Webhook Event**
- `spend` *float*: The current spend amount for the 'event_group'.
- `max_budget` *float*: The maximum allowed budget for the 'event_group'.
- `max_budget` *float or null*: The maximum allowed budget for the 'event_group'. null if not set.
- `token` *str*: A hashed value of the key, used for authentication or identification purposes.
- `user_id` *str or null*: The ID of the user associated with the event (optional).
- `customer_id` *str or null*: The ID of the customer associated with the event (optional).
- `internal_user_id` *str or null*: The ID of the internal user associated with the event (optional).
- `team_id` *str or null*: The ID of the team associated with the event (optional).
- `user_email` *str or null*: The email of the user associated with the event (optional).
- `user_email` *str or null*: The email of the internal user associated with the event (optional).
- `key_alias` *str or null*: An alias for the key associated with the event (optional).
- `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
- `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
- `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
* "spend_tracked": Emitted whenver spend is tracked for a customer id.
* "budget_crossed": Indicates that the spend has exceeded the max budget.
* "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
* "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
- `event_group` *Literal["user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
* "user": The event is related to a specific user.
- `event_group` *Literal["customer", "internal_user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
* "customer": The event is related to a specific customer
* "internal_user": The event is related to a specific internal user.
* "key": The event is related to a specific key.
* "team": The event is related to a team.
* "proxy": The event is related to a proxy.

View file

@ -0,0 +1,251 @@
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# 🙋‍♂️ Customers
Track spend, set budgets for your customers.
## Tracking Customer Credit
### 1. Make LLM API call w/ Customer ID
Make a /chat/completions call, pass 'user' - First call Works
```bash
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer sk-1234' \ # 👈 YOUR PROXY KEY
--data ' {
"model": "azure-gpt-3.5",
"user": "ishaan3", # 👈 CUSTOMER ID
"messages": [
{
"role": "user",
"content": "what time is it"
}
]
}'
```
The customer_id will be upserted into the DB with the new spend.
If the customer_id already exists, spend will be incremented.
### 2. Get Customer Spend
<Tabs>
<TabItem value="all-up" label="All-up spend">
Call `/customer/info` to get a customer's all up spend
```bash
curl -X GET 'http://0.0.0.0:4000/customer/info?end_user_id=ishaan3' \ # 👈 CUSTOMER ID
-H 'Authorization: Bearer sk-1234' \ # 👈 YOUR PROXY KEY
```
Expected Response:
```
{
"user_id": "ishaan3",
"blocked": false,
"alias": null,
"spend": 0.001413,
"allowed_model_region": null,
"default_model": null,
"litellm_budget_table": null
}
```
</TabItem>
<TabItem value="event-webhook" label="Event Webhook">
To update spend in your client-side DB, point the proxy to your webhook.
E.g. if your server is `https://webhook.site` and your listening on `6ab090e8-c55f-4a23-b075-3209f5c57906`
1. Add webhook url to your proxy environment:
```bash
export WEBHOOK_URL="https://webhook.site/6ab090e8-c55f-4a23-b075-3209f5c57906"
```
2. Add 'webhook' to config.yaml
```yaml
general_settings:
alerting: ["webhook"] # 👈 KEY CHANGE
```
3. Test it!
```bash
curl -X POST 'http://localhost:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "mistral",
"messages": [
{
"role": "user",
"content": "What's the weather like in Boston today?"
}
],
"user": "krrish12"
}
'
```
Expected Response
```json
{
"spend": 0.0011120000000000001, # 👈 SPEND
"max_budget": null,
"token": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"customer_id": "krrish12", # 👈 CUSTOMER ID
"user_id": null,
"team_id": null,
"user_email": null,
"key_alias": null,
"projected_exceeded_date": null,
"projected_spend": null,
"event": "spend_tracked",
"event_group": "customer",
"event_message": "Customer spend tracked. Customer=krrish12, spend=0.0011120000000000001"
}
```
[See Webhook Spec](./alerting.md#api-spec-for-webhook-event)
</TabItem>
</Tabs>
## Setting Customer Budgets
Set customer budgets (e.g. monthly budgets, tpm/rpm limits) on LiteLLM Proxy
### Quick Start
Create / Update a customer with budget
**Create New Customer w/ budget**
```bash
curl -X POST 'http://0.0.0.0:4000/customer/new'
-H 'Authorization: Bearer sk-1234'
-H 'Content-Type: application/json'
-D '{
"user_id" : "my-customer-id",
"max_budget": "0", # 👈 CAN BE FLOAT
}'
```
**Test it!**
```bash
curl -X POST 'http://localhost:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "mistral",
"messages": [
{
"role": "user",
"content": "What'\''s the weather like in Boston today?"
}
],
"user": "ishaan-jaff-48"
}
```
### Assign Pricing Tiers
Create and assign customers to pricing tiers.
#### 1. Create a budget
<Tabs>
<TabItem value="ui" label="UI">
- Go to the 'Budgets' tab on the UI.
- Click on '+ Create Budget'.
- Create your pricing tier (e.g. 'my-free-tier' with budget $4). This means each user on this pricing tier will have a max budget of $4.
<Image img={require('../../img/create_budget_modal.png')} />
</TabItem>
<TabItem value="api" label="API">
Use the `/budget/new` endpoint for creating a new budget. [API Reference](https://litellm-api.up.railway.app/#/budget%20management/new_budget_budget_new_post)
```bash
curl -X POST 'http://localhost:4000/budget/new' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"budget_id": "my-free-tier",
"max_budget": 4
}
```
</TabItem>
</Tabs>
#### 2. Assign Budget to Customer
In your application code, assign budget when creating a new customer.
Just use the `budget_id` used when creating the budget. In our example, this is `my-free-tier`.
```bash
curl -X POST 'http://localhost:4000/customer/new' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"user_id": "my-customer-id",
"budget_id": "my-free-tier" # 👈 KEY CHANGE
}
```
#### 3. Test it!
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl -X POST 'http://localhost:4000/customer/new' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"user_id": "my-customer-id",
"budget_id": "my-free-tier" # 👈 KEY CHANGE
}
```
</TabItem>
<TabItem value="openai" label="OpenAI">
```python
from openai import OpenAI
client = OpenAI(
base_url="<your_proxy_base_url",
api_key="<your_proxy_key>"
)
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
],
user="my-customer-id"
)
print(completion.choices[0].message)
```
</TabItem>
</Tabs>

View file

@ -223,7 +223,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
Error
```shell
{"error":{"message":"Authentication Error, ExceededBudget: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%
{"error":{"message":"Budget has been exceeded: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%
```
</TabItem>

Binary file not shown.

After

Width:  |  Height:  |  Size: 193 KiB

View file

@ -41,6 +41,7 @@ const sidebars = {
"proxy/reliability",
"proxy/cost_tracking",
"proxy/users",
"proxy/customers",
"proxy/billing",
"proxy/user_keys",
"proxy/enterprise",

View file

@ -6,7 +6,13 @@ warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*
import threading, requests, os
from typing import Callable, List, Optional, Dict, Union, Any, Literal
from litellm.caching import Cache
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger, json_logs
from litellm._logging import (
set_verbose,
_turn_on_debug,
verbose_logger,
json_logs,
_turn_on_json,
)
from litellm.proxy._types import (
KeyManagementSystem,
KeyManagementSettings,
@ -221,7 +227,7 @@ default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None
max_end_user_budget: Optional[float] = None
#### RELIABILITY ####
request_timeout: Optional[float] = 6000
request_timeout: float = 6000
num_retries: Optional[int] = None # per model endpoint
default_fallbacks: Optional[List] = None
fallbacks: Optional[List] = None
@ -298,6 +304,7 @@ api_base = None
headers = None
api_version = None
organization = None
project = None
config_path = None
####### COMPLETION MODELS ###################
open_ai_chat_completion_models: List = []
@ -797,3 +804,4 @@ from .budget_manager import BudgetManager
from .proxy.proxy_cli import run_server
from .router import Router
from .assistants.main import *
from .batches.main import *

View file

@ -39,6 +39,16 @@ verbose_proxy_logger.addHandler(handler)
verbose_logger.addHandler(handler)
def _turn_on_json():
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
handler.setFormatter(JsonFormatter())
verbose_router_logger.addHandler(handler)
verbose_proxy_logger.addHandler(handler)
verbose_logger.addHandler(handler)
def _turn_on_debug():
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug
verbose_router_logger.setLevel(level=logging.DEBUG) # set router logs to debug

589
litellm/batches/main.py Normal file
View file

@ -0,0 +1,589 @@
"""
Main File for Batches API implementation
https://platform.openai.com/docs/api-reference/batch
- create_batch()
- retrieve_batch()
- cancel_batch()
- list_batch()
"""
import os
import asyncio
from functools import partial
import contextvars
from typing import Literal, Optional, Dict, Coroutine, Any, Union
import httpx
import litellm
from litellm import client
from litellm.utils import supports_httpx_timeout
from ..types.router import *
from ..llms.openai import OpenAIBatchesAPI, OpenAIFilesAPI
from ..types.llms.openai import (
CreateBatchRequest,
RetrieveBatchRequest,
CancelBatchRequest,
CreateFileRequest,
FileTypes,
FileObject,
Batch,
FileContentRequest,
HttpxBinaryResponseContent,
)
####### ENVIRONMENT VARIABLES ###################
openai_batches_instance = OpenAIBatchesAPI()
openai_files_instance = OpenAIFilesAPI()
#################################################
async def acreate_file(
file: FileTypes,
purpose: Literal["assistants", "batch", "fine-tune"],
custom_llm_provider: Literal["openai"] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Coroutine[Any, Any, FileObject]:
"""
Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
"""
try:
loop = asyncio.get_event_loop()
kwargs["acreate_file"] = True
# Use a partial function to pass your keyword arguments
func = partial(
create_file,
file,
purpose,
custom_llm_provider,
extra_headers,
extra_body,
**kwargs,
)
# Add the context to the function
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
return response
except Exception as e:
raise e
def create_file(
file: FileTypes,
purpose: Literal["assistants", "batch", "fine-tune"],
custom_llm_provider: Literal["openai"] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
"""
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
"""
try:
optional_params = GenericLiteLLMParams(**kwargs)
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
or litellm.api_base
or os.getenv("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
organization = (
optional_params.organization
or litellm.organization
or os.getenv("OPENAI_ORGANIZATION", None)
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
)
# set API KEY
api_key = (
optional_params.api_key
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or os.getenv("OPENAI_API_KEY")
)
### TIMEOUT LOGIC ###
timeout = (
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
)
# set timeout for 10 minutes by default
if (
timeout is not None
and isinstance(timeout, httpx.Timeout)
and supports_httpx_timeout(custom_llm_provider) == False
):
read_timeout = timeout.read or 600
timeout = read_timeout # default 10 min timeout
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
elif timeout is None:
timeout = 600.0
_create_file_request = CreateFileRequest(
file=file,
purpose=purpose,
extra_headers=extra_headers,
extra_body=extra_body,
)
_is_async = kwargs.pop("acreate_file", False) is True
response = openai_files_instance.create_file(
_is_async=_is_async,
api_base=api_base,
api_key=api_key,
timeout=timeout,
max_retries=optional_params.max_retries,
organization=organization,
create_file_data=_create_file_request,
)
else:
raise litellm.exceptions.BadRequestError(
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
llm_provider=custom_llm_provider,
response=httpx.Response(
status_code=400,
content="Unsupported provider",
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
return response
except Exception as e:
raise e
async def afile_content(
file_id: str,
custom_llm_provider: Literal["openai"] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
"""
Async: Get file contents
LiteLLM Equivalent of GET https://api.openai.com/v1/files
"""
try:
loop = asyncio.get_event_loop()
kwargs["afile_content"] = True
# Use a partial function to pass your keyword arguments
func = partial(
file_content,
file_id,
custom_llm_provider,
extra_headers,
extra_body,
**kwargs,
)
# Add the context to the function
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
return response
except Exception as e:
raise e
def file_content(
file_id: str,
custom_llm_provider: Literal["openai"] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
"""
Returns the contents of the specified file.
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
"""
try:
optional_params = GenericLiteLLMParams(**kwargs)
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
or litellm.api_base
or os.getenv("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
organization = (
optional_params.organization
or litellm.organization
or os.getenv("OPENAI_ORGANIZATION", None)
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
)
# set API KEY
api_key = (
optional_params.api_key
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or os.getenv("OPENAI_API_KEY")
)
### TIMEOUT LOGIC ###
timeout = (
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
)
# set timeout for 10 minutes by default
if (
timeout is not None
and isinstance(timeout, httpx.Timeout)
and supports_httpx_timeout(custom_llm_provider) == False
):
read_timeout = timeout.read or 600
timeout = read_timeout # default 10 min timeout
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
elif timeout is None:
timeout = 600.0
_file_content_request = FileContentRequest(
file_id=file_id,
extra_headers=extra_headers,
extra_body=extra_body,
)
_is_async = kwargs.pop("afile_content", False) is True
response = openai_files_instance.file_content(
_is_async=_is_async,
file_content_request=_file_content_request,
api_base=api_base,
api_key=api_key,
timeout=timeout,
max_retries=optional_params.max_retries,
organization=organization,
)
else:
raise litellm.exceptions.BadRequestError(
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
llm_provider=custom_llm_provider,
response=httpx.Response(
status_code=400,
content="Unsupported provider",
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
return response
except Exception as e:
raise e
async def acreate_batch(
completion_window: Literal["24h"],
endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
input_file_id: str,
custom_llm_provider: Literal["openai"] = "openai",
metadata: Optional[Dict[str, str]] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Coroutine[Any, Any, Batch]:
"""
Async: Creates and executes a batch from an uploaded file of request
LiteLLM Equivalent of POST: https://api.openai.com/v1/batches
"""
try:
loop = asyncio.get_event_loop()
kwargs["acreate_batch"] = True
# Use a partial function to pass your keyword arguments
func = partial(
create_batch,
completion_window,
endpoint,
input_file_id,
custom_llm_provider,
metadata,
extra_headers,
extra_body,
**kwargs,
)
# Add the context to the function
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
return response
except Exception as e:
raise e
def create_batch(
completion_window: Literal["24h"],
endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
input_file_id: str,
custom_llm_provider: Literal["openai"] = "openai",
metadata: Optional[Dict[str, str]] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
"""
Creates and executes a batch from an uploaded file of request
LiteLLM Equivalent of POST: https://api.openai.com/v1/batches
"""
try:
optional_params = GenericLiteLLMParams(**kwargs)
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
or litellm.api_base
or os.getenv("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
organization = (
optional_params.organization
or litellm.organization
or os.getenv("OPENAI_ORGANIZATION", None)
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
)
# set API KEY
api_key = (
optional_params.api_key
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or os.getenv("OPENAI_API_KEY")
)
### TIMEOUT LOGIC ###
timeout = (
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
)
# set timeout for 10 minutes by default
if (
timeout is not None
and isinstance(timeout, httpx.Timeout)
and supports_httpx_timeout(custom_llm_provider) == False
):
read_timeout = timeout.read or 600
timeout = read_timeout # default 10 min timeout
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
elif timeout is None:
timeout = 600.0
_is_async = kwargs.pop("acreate_batch", False) is True
_create_batch_request = CreateBatchRequest(
completion_window=completion_window,
endpoint=endpoint,
input_file_id=input_file_id,
metadata=metadata,
extra_headers=extra_headers,
extra_body=extra_body,
)
response = openai_batches_instance.create_batch(
api_base=api_base,
api_key=api_key,
organization=organization,
create_batch_data=_create_batch_request,
timeout=timeout,
max_retries=optional_params.max_retries,
_is_async=_is_async,
)
else:
raise litellm.exceptions.BadRequestError(
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
llm_provider=custom_llm_provider,
response=httpx.Response(
status_code=400,
content="Unsupported provider",
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
return response
except Exception as e:
raise e
async def aretrieve_batch(
batch_id: str,
custom_llm_provider: Literal["openai"] = "openai",
metadata: Optional[Dict[str, str]] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Coroutine[Any, Any, Batch]:
"""
Async: Retrieves a batch.
LiteLLM Equivalent of GET https://api.openai.com/v1/batches/{batch_id}
"""
try:
loop = asyncio.get_event_loop()
kwargs["aretrieve_batch"] = True
# Use a partial function to pass your keyword arguments
func = partial(
retrieve_batch,
batch_id,
custom_llm_provider,
metadata,
extra_headers,
extra_body,
**kwargs,
)
# Add the context to the function
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
return response
except Exception as e:
raise e
def retrieve_batch(
batch_id: str,
custom_llm_provider: Literal["openai"] = "openai",
metadata: Optional[Dict[str, str]] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
"""
Retrieves a batch.
LiteLLM Equivalent of GET https://api.openai.com/v1/batches/{batch_id}
"""
try:
optional_params = GenericLiteLLMParams(**kwargs)
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
or litellm.api_base
or os.getenv("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
organization = (
optional_params.organization
or litellm.organization
or os.getenv("OPENAI_ORGANIZATION", None)
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
)
# set API KEY
api_key = (
optional_params.api_key
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or os.getenv("OPENAI_API_KEY")
)
### TIMEOUT LOGIC ###
timeout = (
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
)
# set timeout for 10 minutes by default
if (
timeout is not None
and isinstance(timeout, httpx.Timeout)
and supports_httpx_timeout(custom_llm_provider) == False
):
read_timeout = timeout.read or 600
timeout = read_timeout # default 10 min timeout
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
elif timeout is None:
timeout = 600.0
_retrieve_batch_request = RetrieveBatchRequest(
batch_id=batch_id,
extra_headers=extra_headers,
extra_body=extra_body,
)
_is_async = kwargs.pop("aretrieve_batch", False) is True
response = openai_batches_instance.retrieve_batch(
_is_async=_is_async,
retrieve_batch_data=_retrieve_batch_request,
api_base=api_base,
api_key=api_key,
organization=organization,
timeout=timeout,
max_retries=optional_params.max_retries,
)
else:
raise litellm.exceptions.BadRequestError(
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
llm_provider=custom_llm_provider,
response=httpx.Response(
status_code=400,
content="Unsupported provider",
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
return response
except Exception as e:
raise e
def cancel_batch():
pass
def list_batch():
pass
async def acancel_batch():
pass
async def alist_batch():
pass

View file

@ -314,6 +314,7 @@ class BudgetExceededError(Exception):
self.current_cost = current_cost
self.max_budget = max_budget
message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
self.message = message
super().__init__(message)

View file

@ -455,8 +455,13 @@ class LangFuseLogger:
}
generation_name = clean_metadata.pop("generation_name", None)
if generation_name is None:
# just log `litellm-{call_type}` as the generation name
# if `generation_name` is None, use sensible default values
# If using litellm proxy user `key_alias` if not None
# If `key_alias` is None, just log `litellm-{call_type}` as the generation name
_user_api_key_alias = clean_metadata.get("user_api_key_alias", None)
generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
if _user_api_key_alias is not None:
generation_name = f"litellm:{_user_api_key_alias}"
if response_obj is not None and "system_fingerprint" in response_obj:
system_fingerprint = response_obj.get("system_fingerprint", None)

View file

@ -41,6 +41,7 @@ class ProviderRegionOutageModel(BaseOutageModel):
# we use this for the email header, please send a test email if you change this. verify it looks good on email
LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
LITELLM_SUPPORT_CONTACT = "support@berri.ai"
class LiteLLMBase(BaseModel):
@ -683,14 +684,16 @@ class SlackAlerting(CustomLogger):
event: Optional[
Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
] = None
event_group: Optional[Literal["user", "team", "key", "proxy"]] = None
event_group: Optional[
Literal["internal_user", "team", "key", "proxy", "customer"]
] = None
event_message: str = ""
webhook_event: Optional[WebhookEvent] = None
if type == "proxy_budget":
event_group = "proxy"
event_message += "Proxy Budget: "
elif type == "user_budget":
event_group = "user"
event_group = "internal_user"
event_message += "User Budget: "
_id = user_info.user_id or _id
elif type == "team_budget":
@ -754,6 +757,36 @@ class SlackAlerting(CustomLogger):
return
return
async def customer_spend_alert(
self,
token: Optional[str],
key_alias: Optional[str],
end_user_id: Optional[str],
response_cost: Optional[float],
max_budget: Optional[float],
):
if end_user_id is not None and token is not None and response_cost is not None:
# log customer spend
event = WebhookEvent(
spend=response_cost,
max_budget=max_budget,
token=token,
customer_id=end_user_id,
user_id=None,
team_id=None,
user_email=None,
key_alias=key_alias,
projected_exceeded_date=None,
projected_spend=None,
event="spend_tracked",
event_group="customer",
event_message="Customer spend tracked. Customer={}, spend={}".format(
end_user_id, response_cost
),
)
await self.send_webhook_alert(webhook_event=event)
def _count_outage_alerts(self, alerts: List[int]) -> str:
"""
Parameters:
@ -1171,6 +1204,10 @@ Model Info:
await self._check_if_using_premium_email_feature(
premium_user, email_logo_url, email_support_contact
)
if email_logo_url is None:
email_logo_url = LITELLM_LOGO_URL
if email_support_contact is None:
email_support_contact = LITELLM_SUPPORT_CONTACT
event_name = webhook_event.event_message
recipient_email = webhook_event.user_email
@ -1271,6 +1308,11 @@ Model Info:
premium_user, email_logo_url, email_support_contact
)
if email_logo_url is None:
email_logo_url = LITELLM_LOGO_URL
if email_support_contact is None:
email_support_contact = LITELLM_SUPPORT_CONTACT
event_name = webhook_event.event_message
recipient_email = webhook_event.user_email
user_name = webhook_event.user_id
@ -1401,7 +1443,9 @@ Model Info:
if response.status_code == 200:
pass
else:
print("Error sending slack alert. Error=", response.text) # noqa
verbose_proxy_logger.debug(
"Error sending slack alert. Error=", response.text
)
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
"""Log deployment latency"""
@ -1421,6 +1465,8 @@ Model Info:
final_value = float(
response_s.total_seconds() / completion_tokens
)
if isinstance(final_value, timedelta):
final_value = final_value.total_seconds()
await self.async_update_daily_reports(
DeploymentMetrics(

View file

@ -1,114 +1,153 @@
import traceback
from litellm._logging import verbose_logger
import litellm
class TraceloopLogger:
def __init__(self):
from traceloop.sdk.tracing.tracing import TracerWrapper
from traceloop.sdk import Traceloop
try:
from traceloop.sdk.tracing.tracing import TracerWrapper
from traceloop.sdk import Traceloop
from traceloop.sdk.instruments import Instruments
except ModuleNotFoundError as e:
verbose_logger.error(
f"Traceloop not installed, try running 'pip install traceloop-sdk' to fix this error: {e}\n{traceback.format_exc()}"
)
Traceloop.init(app_name="Litellm-Server", disable_batch=True)
Traceloop.init(
app_name="Litellm-Server",
disable_batch=True,
instruments=[
Instruments.CHROMA,
Instruments.PINECONE,
Instruments.WEAVIATE,
Instruments.LLAMA_INDEX,
Instruments.LANGCHAIN,
],
)
self.tracer_wrapper = TracerWrapper()
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
from opentelemetry.trace import SpanKind
def log_event(
self,
kwargs,
response_obj,
start_time,
end_time,
user_id,
print_verbose,
level="DEFAULT",
status_message=None,
):
from opentelemetry import trace
from opentelemetry.trace import SpanKind, Status, StatusCode
from opentelemetry.semconv.ai import SpanAttributes
try:
print_verbose(
f"Traceloop Logging - Enters logging function for model {kwargs}"
)
tracer = self.tracer_wrapper.get_tracer()
model = kwargs.get("model")
# LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
return
optional_params = kwargs.get("optional_params", {})
with tracer.start_as_current_span(
"litellm.completion",
kind=SpanKind.CLIENT,
) as span:
if span.is_recording():
span = tracer.start_span(
"litellm.completion", kind=SpanKind.CLIENT, start_time=start_time
)
if span.is_recording():
span.set_attribute(
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
)
if "stop" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
optional_params.get("stop"),
)
if "stop" in optional_params:
span.set_attribute(
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
optional_params.get("stop"),
)
if "frequency_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_FREQUENCY_PENALTY,
optional_params.get("frequency_penalty"),
)
if "presence_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_PRESENCE_PENALTY,
optional_params.get("presence_penalty"),
)
if "top_p" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
)
if "tools" in optional_params or "functions" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_FUNCTIONS,
optional_params.get(
"tools", optional_params.get("functions")
),
)
if "user" in optional_params:
span.set_attribute(
SpanAttributes.LLM_USER, optional_params.get("user")
)
if "max_tokens" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
kwargs.get("max_tokens"),
)
if "temperature" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
)
for idx, prompt in enumerate(kwargs.get("messages")):
span.set_attribute(
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
prompt.get("role"),
)
span.set_attribute(
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
prompt.get("content"),
)
if "frequency_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
SpanAttributes.LLM_FREQUENCY_PENALTY,
optional_params.get("frequency_penalty"),
)
if "presence_penalty" in optional_params:
span.set_attribute(
SpanAttributes.LLM_PRESENCE_PENALTY,
optional_params.get("presence_penalty"),
)
if "top_p" in optional_params:
span.set_attribute(
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
)
if "tools" in optional_params or "functions" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_FUNCTIONS,
optional_params.get("tools", optional_params.get("functions")),
)
if "user" in optional_params:
span.set_attribute(
SpanAttributes.LLM_USER, optional_params.get("user")
)
if "max_tokens" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
kwargs.get("max_tokens"),
)
if "temperature" in optional_params:
span.set_attribute(
SpanAttributes.LLM_REQUEST_TEMPERATURE,
kwargs.get("temperature"),
)
usage = response_obj.get("usage")
if usage:
span.set_attribute(
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
usage.get("total_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
usage.get("completion_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
usage.get("prompt_tokens"),
)
for idx, choice in enumerate(response_obj.get("choices")):
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
choice.get("finish_reason"),
)
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
choice.get("message").get("role"),
)
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
choice.get("message").get("content"),
)
for idx, prompt in enumerate(kwargs.get("messages")):
span.set_attribute(
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
prompt.get("role"),
)
span.set_attribute(
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
prompt.get("content"),
)
span.set_attribute(
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
)
usage = response_obj.get("usage")
if usage:
span.set_attribute(
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
usage.get("total_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
usage.get("completion_tokens"),
)
span.set_attribute(
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
usage.get("prompt_tokens"),
)
for idx, choice in enumerate(response_obj.get("choices")):
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
choice.get("finish_reason"),
)
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
choice.get("message").get("role"),
)
span.set_attribute(
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
choice.get("message").get("content"),
)
if (
level == "ERROR"
and status_message is not None
and isinstance(status_message, str)
):
span.record_exception(Exception(status_message))
span.set_status(Status(StatusCode.ERROR, status_message))
span.end(end_time)
except Exception as e:
print_verbose(f"Traceloop Layer Error - {e}")

View file

@ -379,13 +379,12 @@ class AnthropicChatCompletion(BaseLLM):
logger_fn=None,
headers={},
):
self.async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=20.0)
)
data["stream"] = True
response = await self.async_handler.post(
api_base, headers=headers, data=json.dumps(data), stream=True
)
response = await async_handler.post(api_base, headers=headers, json=data)
if response.status_code != 200:
raise AnthropicError(
@ -421,12 +420,10 @@ class AnthropicChatCompletion(BaseLLM):
logger_fn=None,
headers={},
) -> Union[ModelResponse, CustomStreamWrapper]:
self.async_handler = AsyncHTTPHandler(
async_handler = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
response = await self.async_handler.post(
api_base, headers=headers, data=json.dumps(data)
)
response = await async_handler.post(api_base, headers=headers, json=data)
if stream and _is_function_call:
return self.process_streaming_response(
model=model,

View file

@ -43,12 +43,13 @@ class AsyncHTTPHandler:
self,
url: str,
data: Optional[Union[dict, str]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
):
req = self.client.build_request(
"POST", url, data=data, params=params, headers=headers # type: ignore
"POST", url, data=data, json=json, params=params, headers=headers # type: ignore
)
response = await self.client.send(req, stream=stream)
return response

View file

@ -45,6 +45,8 @@ class OllamaConfig:
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
- `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaConfig:
repeat_last_n: Optional[int] = None
repeat_penalty: Optional[float] = None
temperature: Optional[float] = None
seed: Optional[int] = None
stop: Optional[list] = (
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
)
@ -90,6 +93,7 @@ class OllamaConfig:
repeat_last_n: Optional[int] = None,
repeat_penalty: Optional[float] = None,
temperature: Optional[float] = None,
seed: Optional[int] = None,
stop: Optional[list] = None,
tfs_z: Optional[float] = None,
num_predict: Optional[int] = None,
@ -120,6 +124,44 @@ class OllamaConfig:
)
and v is not None
}
def get_supported_openai_params(
self,
):
return [
"max_tokens",
"stream",
"top_p",
"temperature",
"seed",
"frequency_penalty",
"stop",
"response_format",
]
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
# and convert to jpeg if necessary.
def _convert_image(image):
import base64, io
try:
from PIL import Image
except:
raise Exception(
"ollama image conversion failed please run `pip install Pillow`"
)
orig = image
if image.startswith("data:"):
image = image.split(",")[-1]
try:
image_data = Image.open(io.BytesIO(base64.b64decode(image)))
if image_data.format in ["JPEG", "PNG"]:
return image
except:
return orig
jpeg_image = io.BytesIO()
image_data.convert("RGB").save(jpeg_image, "JPEG")
jpeg_image.seek(0)
return base64.b64encode(jpeg_image.getvalue()).decode("utf-8")
# ollama implementation
@ -158,7 +200,7 @@ def get_ollama_response(
if format is not None:
data["format"] = format
if images is not None:
data["images"] = images
data["images"] = [_convert_image(image) for image in images]
## LOGGING
logging_obj.pre_call(

View file

@ -45,6 +45,8 @@ class OllamaChatConfig:
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
- `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaChatConfig:
repeat_last_n: Optional[int] = None
repeat_penalty: Optional[float] = None
temperature: Optional[float] = None
seed: Optional[int] = None
stop: Optional[list] = (
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
)
@ -90,6 +93,7 @@ class OllamaChatConfig:
repeat_last_n: Optional[int] = None,
repeat_penalty: Optional[float] = None,
temperature: Optional[float] = None,
seed: Optional[int] = None,
stop: Optional[list] = None,
tfs_z: Optional[float] = None,
num_predict: Optional[int] = None,
@ -130,6 +134,7 @@ class OllamaChatConfig:
"stream",
"top_p",
"temperature",
"seed",
"frequency_penalty",
"stop",
"tools",
@ -146,6 +151,8 @@ class OllamaChatConfig:
optional_params["stream"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "seed":
optional_params["seed"] = value
if param == "top_p":
optional_params["top_p"] = value
if param == "frequency_penalty":

View file

@ -21,11 +21,12 @@ from litellm.utils import (
TranscriptionResponse,
TextCompletionResponse,
)
from typing import Callable, Optional
from typing import Callable, Optional, Coroutine
import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from openai import OpenAI, AsyncOpenAI
from ..types.llms.openai import *
import openai
class OpenAIError(Exception):
@ -349,7 +350,6 @@ class OpenAIConfig:
"top_p",
"tools",
"tool_choice",
"user",
"function_call",
"functions",
"max_retries",
@ -362,6 +362,12 @@ class OpenAIConfig:
): # gpt-4 does not support 'response_format'
model_specific_params.append("response_format")
if (
model in litellm.open_ai_chat_completion_models
) or model in litellm.open_ai_text_completion_models:
model_specific_params.append(
"user"
) # user is not a param supported by all openai-compatible endpoints - e.g. azure ai
return base_params + model_specific_params
def map_openai_params(
@ -1085,8 +1091,8 @@ class OpenAIChatCompletion(BaseLLM):
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_key: Optional[str],
api_base: Optional[str],
client=None,
logging_obj=None,
atranscription: bool = False,
@ -1142,7 +1148,6 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=None,
logging_obj=None,
):
response = None
try:
if client is None:
openai_aclient = AsyncOpenAI(
@ -1176,6 +1181,95 @@ class OpenAIChatCompletion(BaseLLM):
)
raise e
def audio_speech(
self,
model: str,
input: str,
voice: str,
optional_params: dict,
api_key: Optional[str],
api_base: Optional[str],
organization: Optional[str],
project: Optional[str],
max_retries: int,
timeout: Union[float, httpx.Timeout],
aspeech: Optional[bool] = None,
client=None,
) -> HttpxBinaryResponseContent:
if aspeech is not None and aspeech == True:
return self.async_audio_speech(
model=model,
input=input,
voice=voice,
optional_params=optional_params,
api_key=api_key,
api_base=api_base,
organization=organization,
project=project,
max_retries=max_retries,
timeout=timeout,
client=client,
) # type: ignore
if client is None:
openai_client = OpenAI(
api_key=api_key,
base_url=api_base,
organization=organization,
project=project,
http_client=litellm.client_session,
timeout=timeout,
max_retries=max_retries,
)
else:
openai_client = client
response = openai_client.audio.speech.create(
model=model,
voice=voice, # type: ignore
input=input,
**optional_params,
)
return response
async def async_audio_speech(
self,
model: str,
input: str,
voice: str,
optional_params: dict,
api_key: Optional[str],
api_base: Optional[str],
organization: Optional[str],
project: Optional[str],
max_retries: int,
timeout: Union[float, httpx.Timeout],
client=None,
) -> HttpxBinaryResponseContent:
if client is None:
openai_client = AsyncOpenAI(
api_key=api_key,
base_url=api_base,
organization=organization,
project=project,
http_client=litellm.aclient_session,
timeout=timeout,
max_retries=max_retries,
)
else:
openai_client = client
response = await openai_client.audio.speech.create(
model=model,
voice=voice, # type: ignore
input=input,
**optional_params,
)
return response
async def ahealth_check(
self,
model: Optional[str],
@ -1497,6 +1591,322 @@ class OpenAITextCompletion(BaseLLM):
yield transformed_chunk
class OpenAIFilesAPI(BaseLLM):
"""
OpenAI methods to support for batches
- create_file()
- retrieve_file()
- list_files()
- delete_file()
- file_content()
- update_file()
"""
def __init__(self) -> None:
super().__init__()
def get_openai_client(
self,
api_key: Optional[str],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
_is_async: bool = False,
) -> Optional[Union[OpenAI, AsyncOpenAI]]:
received_args = locals()
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
if client is None:
data = {}
for k, v in received_args.items():
if k == "self" or k == "client" or k == "_is_async":
pass
elif k == "api_base" and v is not None:
data["base_url"] = v
elif v is not None:
data[k] = v
if _is_async is True:
openai_client = AsyncOpenAI(**data)
else:
openai_client = OpenAI(**data) # type: ignore
else:
openai_client = client
return openai_client
async def acreate_file(
self,
create_file_data: CreateFileRequest,
openai_client: AsyncOpenAI,
) -> FileObject:
response = await openai_client.files.create(**create_file_data)
return response
def create_file(
self,
_is_async: bool,
create_file_data: CreateFileRequest,
api_base: str,
api_key: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
client=client,
_is_async=_is_async,
)
if openai_client is None:
raise ValueError(
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
)
if _is_async is True:
if not isinstance(openai_client, AsyncOpenAI):
raise ValueError(
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
)
return self.acreate_file( # type: ignore
create_file_data=create_file_data, openai_client=openai_client
)
response = openai_client.files.create(**create_file_data)
return response
async def afile_content(
self,
file_content_request: FileContentRequest,
openai_client: AsyncOpenAI,
) -> HttpxBinaryResponseContent:
response = await openai_client.files.content(**file_content_request)
return response
def file_content(
self,
_is_async: bool,
file_content_request: FileContentRequest,
api_base: str,
api_key: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
) -> Union[
HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
]:
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
client=client,
_is_async=_is_async,
)
if openai_client is None:
raise ValueError(
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
)
if _is_async is True:
if not isinstance(openai_client, AsyncOpenAI):
raise ValueError(
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
)
return self.afile_content( # type: ignore
file_content_request=file_content_request,
openai_client=openai_client,
)
response = openai_client.files.content(**file_content_request)
return response
class OpenAIBatchesAPI(BaseLLM):
"""
OpenAI methods to support for batches
- create_batch()
- retrieve_batch()
- cancel_batch()
- list_batch()
"""
def __init__(self) -> None:
super().__init__()
def get_openai_client(
self,
api_key: Optional[str],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
_is_async: bool = False,
) -> Optional[Union[OpenAI, AsyncOpenAI]]:
received_args = locals()
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
if client is None:
data = {}
for k, v in received_args.items():
if k == "self" or k == "client" or k == "_is_async":
pass
elif k == "api_base" and v is not None:
data["base_url"] = v
elif v is not None:
data[k] = v
if _is_async is True:
openai_client = AsyncOpenAI(**data)
else:
openai_client = OpenAI(**data) # type: ignore
else:
openai_client = client
return openai_client
async def acreate_batch(
self,
create_batch_data: CreateBatchRequest,
openai_client: AsyncOpenAI,
) -> Batch:
response = await openai_client.batches.create(**create_batch_data)
return response
def create_batch(
self,
_is_async: bool,
create_batch_data: CreateBatchRequest,
api_key: Optional[str],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
client=client,
_is_async=_is_async,
)
if openai_client is None:
raise ValueError(
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
)
if _is_async is True:
if not isinstance(openai_client, AsyncOpenAI):
raise ValueError(
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
)
return self.acreate_batch( # type: ignore
create_batch_data=create_batch_data, openai_client=openai_client
)
response = openai_client.batches.create(**create_batch_data)
return response
async def aretrieve_batch(
self,
retrieve_batch_data: RetrieveBatchRequest,
openai_client: AsyncOpenAI,
) -> Batch:
response = await openai_client.batches.retrieve(**retrieve_batch_data)
return response
def retrieve_batch(
self,
_is_async: bool,
retrieve_batch_data: RetrieveBatchRequest,
api_key: Optional[str],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[OpenAI] = None,
):
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
client=client,
_is_async=_is_async,
)
if openai_client is None:
raise ValueError(
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
)
if _is_async is True:
if not isinstance(openai_client, AsyncOpenAI):
raise ValueError(
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
)
return self.aretrieve_batch( # type: ignore
retrieve_batch_data=retrieve_batch_data, openai_client=openai_client
)
response = openai_client.batches.retrieve(**retrieve_batch_data)
return response
def cancel_batch(
self,
_is_async: bool,
cancel_batch_data: CancelBatchRequest,
api_key: Optional[str],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[OpenAI] = None,
):
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
client=client,
_is_async=_is_async,
)
if openai_client is None:
raise ValueError(
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
)
response = openai_client.batches.cancel(**cancel_batch_data)
return response
# def list_batch(
# self,
# list_batch_data: ListBatchRequest,
# api_key: Optional[str],
# api_base: Optional[str],
# timeout: Union[float, httpx.Timeout],
# max_retries: Optional[int],
# organization: Optional[str],
# client: Optional[OpenAI] = None,
# ):
# openai_client: OpenAI = self.get_openai_client(
# api_key=api_key,
# api_base=api_base,
# timeout=timeout,
# max_retries=max_retries,
# organization=organization,
# client=client,
# )
# response = openai_client.batches.list(**list_batch_data)
# return response
class OpenAIAssistantsAPI(BaseLLM):
def __init__(self) -> None:
super().__init__()

View file

@ -14,7 +14,6 @@ from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy
import httpx
import litellm
from ._logging import verbose_logger
from litellm import ( # type: ignore
@ -92,6 +91,7 @@ import tiktoken
from concurrent.futures import ThreadPoolExecutor
from typing import Callable, List, Optional, Dict, Union, Mapping
from .caching import enable_cache, disable_cache, update_cache
from .types.llms.openai import HttpxBinaryResponseContent
encoding = tiktoken.get_encoding("cl100k_base")
from litellm.utils import (
@ -680,6 +680,7 @@ def completion(
"region_name",
"allowed_model_region",
"model_config",
"fastest_response",
]
default_params = openai_params + litellm_params
@ -4130,6 +4131,24 @@ def transcription(
max_retries=max_retries,
)
elif custom_llm_provider == "openai":
api_base = (
api_base
or litellm.api_base
or get_secret("OPENAI_API_BASE")
or "https://api.openai.com/v1"
) # type: ignore
openai.organization = (
litellm.organization
or get_secret("OPENAI_ORGANIZATION")
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
)
# set API KEY
api_key = (
api_key
or litellm.api_key
or litellm.openai_key
or get_secret("OPENAI_API_KEY")
) # type: ignore
response = openai_chat_completions.audio_transcriptions(
model=model,
audio_file=file,
@ -4139,6 +4158,139 @@ def transcription(
timeout=timeout,
logging_obj=litellm_logging_obj,
max_retries=max_retries,
api_base=api_base,
api_key=api_key,
)
return response
@client
async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
"""
Calls openai tts endpoints.
"""
loop = asyncio.get_event_loop()
model = args[0] if len(args) > 0 else kwargs["model"]
### PASS ARGS TO Image Generation ###
kwargs["aspeech"] = True
custom_llm_provider = kwargs.get("custom_llm_provider", None)
try:
# Use a partial function to pass your keyword arguments
func = partial(speech, *args, **kwargs)
# Add the context to the function
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
_, custom_llm_provider, _, _ = get_llm_provider(
model=model, api_base=kwargs.get("api_base", None)
)
# Await normally
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
# Call the synchronous function using run_in_executor
response = await loop.run_in_executor(None, func_with_context)
return response # type: ignore
except Exception as e:
custom_llm_provider = custom_llm_provider or "openai"
raise exception_type(
model=model,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=args,
extra_kwargs=kwargs,
)
@client
def speech(
model: str,
input: str,
voice: str,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
organization: Optional[str] = None,
project: Optional[str] = None,
max_retries: Optional[int] = None,
metadata: Optional[dict] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
response_format: Optional[str] = None,
speed: Optional[int] = None,
client=None,
headers: Optional[dict] = None,
custom_llm_provider: Optional[str] = None,
aspeech: Optional[bool] = None,
**kwargs,
) -> HttpxBinaryResponseContent:
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore
optional_params = {}
if response_format is not None:
optional_params["response_format"] = response_format
if speed is not None:
optional_params["speed"] = speed # type: ignore
if timeout is None:
timeout = litellm.request_timeout
if max_retries is None:
max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES
response: Optional[HttpxBinaryResponseContent] = None
if custom_llm_provider == "openai":
api_base = (
api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
or litellm.api_base
or get_secret("OPENAI_API_BASE")
or "https://api.openai.com/v1"
) # type: ignore
# set API KEY
api_key = (
api_key
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or get_secret("OPENAI_API_KEY")
) # type: ignore
organization = (
organization
or litellm.organization
or get_secret("OPENAI_ORGANIZATION")
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
) # type: ignore
project = (
project
or litellm.project
or get_secret("OPENAI_PROJECT")
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
) # type: ignore
headers = headers or litellm.headers
response = openai_chat_completions.audio_speech(
model=model,
input=input,
voice=voice,
optional_params=optional_params,
api_key=api_key,
api_base=api_base,
organization=organization,
project=project,
max_retries=max_retries,
timeout=timeout,
client=client, # pass AsyncOpenAI, OpenAI client
aspeech=aspeech,
)
if response is None:
raise Exception(
"Unable to map the custom llm provider={} to a known provider={}.".format(
custom_llm_provider, litellm.provider_list
)
)
return response

View file

@ -1265,8 +1265,8 @@
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.0000075,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5d93d4a9fa59d72f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/33354d8285fe572e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/33354d8285fe572e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45014,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"359\",\"static/chunks/359-f105a7fb61fe8110.js\",\"440\",\"static/chunks/440-b9a05f116e1a696d.js\",\"134\",\"static/chunks/134-4a7b43f992182f2c.js\",\"931\",\"static/chunks/app/page-f610596e5fb3cce4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/33354d8285fe572e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PcGFjo5-03lHREJ3E0k6y\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
3:I[45014,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","359","static/chunks/359-f105a7fb61fe8110.js","440","static/chunks/440-b9a05f116e1a696d.js","134","static/chunks/134-4a7b43f992182f2c.js","931","static/chunks/app/page-f610596e5fb3cce4.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,7 @@
2:I[77831,[],""]
3:I[87494,["359","static/chunks/359-f105a7fb61fe8110.js","134","static/chunks/134-4a7b43f992182f2c.js","418","static/chunks/app/model_hub/page-aa3c10cf9bb31255.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,42 +1,16 @@
general_settings:
alert_to_webhook_url:
budget_alerts: https://hooks.slack.com/services/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
daily_reports: https://hooks.slack.com/services/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
db_exceptions: https://hooks.slack.com/services/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
llm_exceptions: https://hooks.slack.com/services/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
llm_requests_hanging: https://hooks.slack.com/services/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
llm_too_slow: https://hooks.slack.com/services/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
outage_alerts: https://hooks.slack.com/services/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
alert_types:
- llm_exceptions
- llm_too_slow
- llm_requests_hanging
- budget_alerts
- db_exceptions
- daily_reports
- spend_reports
- cooldown_deployment
- new_model_added
- outage_alerts
alerting:
- slack
database_connection_pool_limit: 100
database_connection_timeout: 60
health_check_interval: 300
ui_access_mode: all
# litellm_settings:
# json_logs: true
model_list:
- litellm_params:
api_base: http://0.0.0.0:8080
api_key: ''
model: openai/my-fake-model
rpm: 800
model_name: gpt-3.5-turbo-fake-model
- litellm_params:
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
api_key: os.environ/AZURE_EUROPE_API_KEY
model: azure/gpt-35-turbo
model_name: gpt-3.5-turbo
rpm: 10
model_name: gpt-3.5-turbo-fake-model
- litellm_params:
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_key: os.environ/AZURE_API_KEY
@ -52,5 +26,8 @@ model_list:
api_version: '2023-05-15'
model: azure/chatgpt-v-2
model_name: gpt-3.5-turbo
- model_name: tts
litellm_params:
model: openai/tts-1
router_settings:
enable_pre_call_checks: true

View file

@ -1,4 +1,4 @@
from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
from pydantic import BaseModel, Extra, Field, model_validator, Json, ConfigDict
from dataclasses import fields
import enum
from typing import Optional, List, Union, Dict, Literal, Any
@ -7,6 +7,75 @@ import uuid, json, sys, os
from litellm.types.router import UpdateRouterConfig
from litellm.types.utils import ProviderField
class LitellmUserRoles(str, enum.Enum):
"""
Admin Roles:
PROXY_ADMIN: admin over the platform
PROXY_ADMIN_VIEW_ONLY: can login, view all own keys, view all spend
Internal User Roles:
INTERNAL_USER: can login, view/create/delete their own keys, view their spend
INTERNAL_USER_VIEW_ONLY: can login, view their own keys, view their own spend
Team Roles:
TEAM: used for JWT auth
Customer Roles:
CUSTOMER: External users -> these are customers
"""
# Admin Roles
PROXY_ADMIN = "proxy_admin"
PROXY_ADMIN_VIEW_ONLY = "proxy_admin_viewer"
# Internal User Roles
INTERNAL_USER = "internal_user"
INTERNAL_USER_VIEW_ONLY = "internal_user_viewer"
# Team Roles
TEAM = "team"
# Customer Roles - External users of proxy
CUSTOMER = "customer"
def __str__(self):
return str(self.value)
@property
def description(self):
"""
Descriptions for the enum values
"""
descriptions = {
"proxy_admin": "admin over litellm proxy, has all permissions",
"proxy_admin_viewer": "view all keys, view all spend",
"internal_user": "view/create/delete their own keys, view their own spend",
"internal_user_viewer": "view their own keys, view their own spend",
"team": "team scope used for JWT auth",
"customer": "customer",
}
return descriptions.get(self.value, "")
@property
def ui_label(self):
"""
UI labels for the enum values
"""
ui_labels = {
"proxy_admin": "Admin (All Permissions)",
"proxy_admin_viewer": "Admin (View Only)",
"internal_user": "Internal User (Create/Delete/View)",
"internal_user_viewer": "Internal User (View Only)",
"team": "Team",
"customer": "Customer",
}
return ui_labels.get(self.value, "")
AlertType = Literal[
"llm_exceptions",
"llm_too_slow",
@ -50,8 +119,7 @@ class LiteLLMBase(BaseModel):
# if using pydantic v1
return self.__fields_set__
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
@ -99,6 +167,14 @@ class LiteLLMRoutes(enum.Enum):
# moderations
"/moderations",
"/v1/moderations",
# batches
"/v1/batches",
"/batches",
"/v1/batches{batch_id}",
"/batches{batch_id}",
# files
"/v1/files",
"/files",
# models
"/models",
"/v1/models",
@ -272,7 +348,8 @@ class LiteLLMPromptInjectionParams(LiteLLMBase):
description="Return rejected request error message as a string to the user. Default behaviour is to raise an exception.",
)
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def check_llm_api_params(cls, values):
llm_api_check = values.get("llm_api_check")
if llm_api_check is True:
@ -330,8 +407,7 @@ class ProxyChatCompletionRequest(LiteLLMBase):
deployment_id: Optional[str] = None
request_timeout: Optional[int] = None
class Config:
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
model_config = ConfigDict(extra="allow") # allow params not defined here, these fall in litellm.completion(**kwargs)
class ModelInfoDelete(LiteLLMBase):
@ -358,11 +434,10 @@ class ModelInfo(LiteLLMBase):
]
]
class Config:
extra = Extra.allow # Allow extra fields
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=(), extra="allow")
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def set_model_info(cls, values):
if values.get("id") is None:
values.update({"id": str(uuid.uuid4())})
@ -393,10 +468,10 @@ class ModelParams(LiteLLMBase):
litellm_params: dict
model_info: ModelInfo
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def set_model_info(cls, values):
if values.get("model_info") is None:
values.update({"model_info": ModelInfo()})
@ -432,8 +507,7 @@ class GenerateKeyRequest(GenerateRequestBase):
{}
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class GenerateKeyResponse(GenerateKeyRequest):
@ -443,7 +517,8 @@ class GenerateKeyResponse(GenerateKeyRequest):
user_id: Optional[str] = None
token_id: Optional[str] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def set_model_info(cls, values):
if values.get("token") is not None:
values.update({"key": values.get("token")})
@ -483,14 +558,22 @@ class LiteLLM_ModelTable(LiteLLMBase):
created_by: str
updated_by: str
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class NewUserRequest(GenerateKeyRequest):
max_budget: Optional[float] = None
user_email: Optional[str] = None
user_role: Optional[str] = None
user_role: Optional[
Literal[
LitellmUserRoles.PROXY_ADMIN,
LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
LitellmUserRoles.INTERNAL_USER,
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
LitellmUserRoles.TEAM,
LitellmUserRoles.CUSTOMER,
]
] = None
teams: Optional[list] = None
organization_id: Optional[str] = None
auto_create_key: bool = (
@ -509,10 +592,20 @@ class UpdateUserRequest(GenerateRequestBase):
user_email: Optional[str] = None
spend: Optional[float] = None
metadata: Optional[dict] = None
user_role: Optional[str] = None
user_role: Optional[
Literal[
LitellmUserRoles.PROXY_ADMIN,
LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
LitellmUserRoles.INTERNAL_USER,
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
LitellmUserRoles.TEAM,
LitellmUserRoles.CUSTOMER,
]
] = None
max_budget: Optional[float] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
@ -536,7 +629,8 @@ class NewCustomerRequest(LiteLLMBase):
None # if no equivalent model in allowed region - default all requests to this model
)
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def check_user_info(cls, values):
if values.get("max_budget") is not None and values.get("budget_id") is not None:
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
@ -576,7 +670,8 @@ class Member(LiteLLMBase):
user_id: Optional[str] = None
user_email: Optional[str] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
@ -605,8 +700,7 @@ class TeamBase(LiteLLMBase):
class NewTeamRequest(TeamBase):
model_aliases: Optional[dict] = None
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class GlobalEndUsersSpend(LiteLLMBase):
@ -626,7 +720,8 @@ class TeamMemberDeleteRequest(LiteLLMBase):
user_id: Optional[str] = None
user_email: Optional[str] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
@ -692,10 +787,10 @@ class LiteLLM_TeamTable(TeamBase):
budget_reset_at: Optional[datetime] = None
model_id: Optional[int] = None
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def set_model_info(cls, values):
dict_fields = [
"metadata",
@ -731,8 +826,7 @@ class LiteLLM_BudgetTable(LiteLLMBase):
model_max_budget: Optional[dict] = None
budget_duration: Optional[str] = None
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
@ -745,8 +839,7 @@ class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
team_id: Optional[str] = None
budget_id: Optional[str] = None
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class NewOrganizationRequest(LiteLLM_BudgetTable):
@ -825,8 +918,7 @@ class KeyManagementSettings(LiteLLMBase):
class TeamDefaultSettings(LiteLLMBase):
team_id: str
class Config:
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
model_config = ConfigDict(extra="allow") # allow params not defined here, these fall in litellm.completion(**kwargs)
class DynamoDBArgs(LiteLLMBase):
@ -988,8 +1080,7 @@ class ConfigYAML(LiteLLMBase):
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
)
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLM_VerificationToken(LiteLLMBase):
@ -1019,9 +1110,7 @@ class LiteLLM_VerificationToken(LiteLLMBase):
org_id: Optional[str] = None # org id for a given key
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
"""
@ -1043,6 +1132,7 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
end_user_id: Optional[str] = None
end_user_tpm_limit: Optional[int] = None
end_user_rpm_limit: Optional[int] = None
end_user_max_budget: Optional[float] = None
class UserAPIKeyAuth(
@ -1053,10 +1143,20 @@ class UserAPIKeyAuth(
"""
api_key: Optional[str] = None
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
user_role: Optional[
Literal[
LitellmUserRoles.PROXY_ADMIN,
LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
LitellmUserRoles.INTERNAL_USER,
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
LitellmUserRoles.TEAM,
LitellmUserRoles.CUSTOMER,
]
] = None
allowed_model_region: Optional[Literal["eu"]] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def check_api_key(cls, values):
if values.get("api_key") is not None:
values.update({"token": hash_token(values.get("api_key"))})
@ -1083,7 +1183,8 @@ class LiteLLM_UserTable(LiteLLMBase):
tpm_limit: Optional[int] = None
rpm_limit: Optional[int] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def set_model_info(cls, values):
if values.get("spend") is None:
values.update({"spend": 0.0})
@ -1091,8 +1192,7 @@ class LiteLLM_UserTable(LiteLLMBase):
values.update({"models": []})
return values
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLM_EndUserTable(LiteLLMBase):
@ -1104,14 +1204,14 @@ class LiteLLM_EndUserTable(LiteLLMBase):
default_model: Optional[str] = None
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
@root_validator(pre=True)
@model_validator(mode="before")
@classmethod
def set_model_info(cls, values):
if values.get("spend") is None:
values.update({"spend": 0.0})
return values
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLM_SpendLogs(LiteLLMBase):
@ -1170,6 +1270,7 @@ class CallInfo(LiteLLMBase):
spend: float
max_budget: Optional[float] = None
token: str = Field(description="Hashed value of that key")
customer_id: Optional[str] = None
user_id: Optional[str] = None
team_id: Optional[str] = None
user_email: Optional[str] = None
@ -1180,9 +1281,13 @@ class CallInfo(LiteLLMBase):
class WebhookEvent(CallInfo):
event: Literal[
"budget_crossed", "threshold_crossed", "projected_limit_exceeded", "key_created"
"budget_crossed",
"threshold_crossed",
"projected_limit_exceeded",
"key_created",
"spend_tracked",
]
event_group: Literal["user", "key", "team", "proxy"]
event_group: Literal["internal_user", "key", "team", "proxy", "customer"]
event_message: str # human-readable description of event
@ -1215,6 +1320,7 @@ class InvitationModel(LiteLLMBase):
updated_at: datetime
updated_by: str
class ConfigFieldInfo(LiteLLMBase):
field_name: str
field_value: Any

View file

@ -15,6 +15,7 @@ from litellm.proxy._types import (
LiteLLM_TeamTable,
LiteLLMRoutes,
LiteLLM_OrganizationTable,
LitellmUserRoles,
)
from typing import Optional, Literal, Union
from litellm.proxy.utils import PrismaClient
@ -133,7 +134,11 @@ def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool:
def allowed_routes_check(
user_role: Literal["proxy_admin", "team", "user"],
user_role: Literal[
LitellmUserRoles.PROXY_ADMIN,
LitellmUserRoles.TEAM,
LitellmUserRoles.INTERNAL_USER,
],
user_route: str,
litellm_proxy_roles: LiteLLM_JWTAuth,
) -> bool:
@ -141,14 +146,14 @@ def allowed_routes_check(
Check if user -> not admin - allowed to access these routes
"""
if user_role == "proxy_admin":
if user_role == LitellmUserRoles.PROXY_ADMIN:
is_allowed = _allowed_routes_check(
user_route=user_route,
allowed_routes=litellm_proxy_roles.admin_allowed_routes,
)
return is_allowed
elif user_role == "team":
elif user_role == LitellmUserRoles.TEAM:
if litellm_proxy_roles.team_allowed_routes is None:
"""
By default allow a team to call openai + info routes
@ -193,13 +198,27 @@ async def get_end_user_object(
if end_user_id is None:
return None
_key = "end_user_id:{}".format(end_user_id)
def check_in_budget(end_user_obj: LiteLLM_EndUserTable):
if end_user_obj.litellm_budget_table is None:
return
end_user_budget = end_user_obj.litellm_budget_table.max_budget
if end_user_budget is not None and end_user_obj.spend > end_user_budget:
raise litellm.BudgetExceededError(
current_cost=end_user_obj.spend, max_budget=end_user_budget
)
# check if in cache
cached_user_obj = await user_api_key_cache.async_get_cache(key=_key)
if cached_user_obj is not None:
if isinstance(cached_user_obj, dict):
return LiteLLM_EndUserTable(**cached_user_obj)
return_obj = LiteLLM_EndUserTable(**cached_user_obj)
check_in_budget(end_user_obj=return_obj)
return return_obj
elif isinstance(cached_user_obj, LiteLLM_EndUserTable):
return cached_user_obj
return_obj = cached_user_obj
check_in_budget(end_user_obj=return_obj)
return return_obj
# else, check db
try:
response = await prisma_client.db.litellm_endusertable.find_unique(
@ -217,8 +236,12 @@ async def get_end_user_object(
_response = LiteLLM_EndUserTable(**response.dict())
check_in_budget(end_user_obj=_response)
return _response
except Exception as e: # if end-user not in db
if isinstance(e, litellm.BudgetExceededError):
raise e
return None

View file

@ -429,6 +429,19 @@ def run_server(
proxy_config = ProxyConfig()
_config = asyncio.run(proxy_config.get_config(config_file_path=config))
### LITELLM SETTINGS ###
litellm_settings = _config.get("litellm_settings", None)
if (
litellm_settings is not None
and "json_logs" in litellm_settings
and litellm_settings["json_logs"] == True
):
import litellm
litellm.json_logs = True
litellm._turn_on_json()
### GENERAL SETTINGS ###
general_settings = _config.get("general_settings", {})
if general_settings is None:
general_settings = {}

File diff suppressed because it is too large Load diff

View file

@ -15,6 +15,7 @@ from litellm.proxy._types import (
WebhookEvent,
AlertType,
ResetTeamBudgetRequest,
LitellmUserRoles,
)
from litellm.caching import DualCache, RedisCache
from litellm.router import Deployment, ModelInfo, LiteLLM_Params
@ -2637,7 +2638,7 @@ def _is_user_proxy_admin(user_id_information: Optional[list]):
_user = user_id_information[0]
if (
_user.get("user_role", None) is not None
and _user.get("user_role") == "proxy_admin"
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
):
return True
@ -2650,7 +2651,7 @@ def _is_user_proxy_admin(user_id_information: Optional[list]):
if (
_user.get("user_role", None) is not None
and _user.get("user_role") == "proxy_admin"
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
):
return True

View file

@ -103,7 +103,9 @@ class Router:
allowed_fails: Optional[
int
] = None, # Number of times a deployment can failbefore being added to cooldown
cooldown_time: float = 1, # (seconds) time to cooldown a deployment after failure
cooldown_time: Optional[
float
] = None, # (seconds) time to cooldown a deployment after failure
routing_strategy: Literal[
"simple-shuffle",
"least-busy",
@ -248,7 +250,7 @@ class Router:
) # initialize an empty list - to allow _add_deployment and delete_deployment to work
self.allowed_fails = allowed_fails or litellm.allowed_fails
self.cooldown_time = cooldown_time or 1
self.cooldown_time = cooldown_time or 60
self.failed_calls = (
InMemoryCache()
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
@ -356,7 +358,8 @@ class Router:
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
if len(fallback_dict) != 1:
raise ValueError(
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
)
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy":
@ -662,12 +665,40 @@ class Router:
raise e
async def abatch_completion(
self, models: List[str], messages: List[Dict[str, str]], **kwargs
self,
models: List[str],
messages: Union[List[Dict[str, str]], List[List[Dict[str, str]]]],
**kwargs,
):
"""
Async Batch Completion - Batch Process 1 request to multiple model_group on litellm.Router
Use this for sending the same request to N models
Async Batch Completion. Used for 2 scenarios:
1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this
Example Request for 1 request to N models:
```
response = await router.abatch_completion(
models=["gpt-3.5-turbo", "groq-llama"],
messages=[
{"role": "user", "content": "is litellm becoming a better product ?"}
],
max_tokens=15,
)
```
Example Request for N requests to M models:
```
response = await router.abatch_completion(
models=["gpt-3.5-turbo", "groq-llama"],
messages=[
[{"role": "user", "content": "is litellm becoming a better product ?"}],
[{"role": "user", "content": "who is this"}],
],
)
```
"""
############## Helpers for async completion ##################
async def _async_completion_no_exceptions(
model: str, messages: List[Dict[str, str]], **kwargs
@ -680,17 +711,50 @@ class Router:
except Exception as e:
return e
_tasks = []
for model in models:
# add each task but if the task fails
_tasks.append(
_async_completion_no_exceptions(
model=model, messages=messages, **kwargs
async def _async_completion_no_exceptions_return_idx(
model: str,
messages: List[Dict[str, str]],
idx: int, # index of message this response corresponds to
**kwargs,
):
"""
Wrapper around self.async_completion that catches exceptions and returns them as a result
"""
try:
return (
await self.acompletion(model=model, messages=messages, **kwargs),
idx,
)
)
except Exception as e:
return e, idx
response = await asyncio.gather(*_tasks)
return response
############## Helpers for async completion ##################
if isinstance(messages, list) and all(isinstance(m, dict) for m in messages):
_tasks = []
for model in models:
# add each task but if the task fails
_tasks.append(_async_completion_no_exceptions(model=model, messages=messages, **kwargs)) # type: ignore
response = await asyncio.gather(*_tasks)
return response
elif isinstance(messages, list) and all(isinstance(m, list) for m in messages):
_tasks = []
for idx, message in enumerate(messages):
for model in models:
# Request Number X, Model Number Y
_tasks.append(
_async_completion_no_exceptions_return_idx(
model=model, idx=idx, messages=message, **kwargs # type: ignore
)
)
responses = await asyncio.gather(*_tasks)
final_responses: List[List[Any]] = [[] for _ in range(len(messages))]
for response in responses:
if isinstance(response, tuple):
final_responses[response[1]].append(response[0])
else:
final_responses[0].append(response)
return final_responses
async def abatch_completion_one_model_multiple_requests(
self, model: str, messages: List[List[Dict[str, str]]], **kwargs
@ -737,6 +801,101 @@ class Router:
response = await asyncio.gather(*_tasks)
return response
# fmt: off
@overload
async def abatch_completion_fastest_response(
self, model: str, messages: List[Dict[str, str]], stream: Literal[True], **kwargs
) -> CustomStreamWrapper:
...
@overload
async def abatch_completion_fastest_response(
self, model: str, messages: List[Dict[str, str]], stream: Literal[False] = False, **kwargs
) -> ModelResponse:
...
# fmt: on
async def abatch_completion_fastest_response(
self,
model: str,
messages: List[Dict[str, str]],
stream: bool = False,
**kwargs,
):
"""
model - List of comma-separated model names. E.g. model="gpt-4, gpt-3.5-turbo"
Returns fastest response from list of model names. OpenAI-compatible endpoint.
"""
models = [m.strip() for m in model.split(",")]
async def _async_completion_no_exceptions(
model: str, messages: List[Dict[str, str]], stream: bool, **kwargs: Any
) -> Union[ModelResponse, CustomStreamWrapper, Exception]:
"""
Wrapper around self.acompletion that catches exceptions and returns them as a result
"""
try:
return await self.acompletion(model=model, messages=messages, stream=stream, **kwargs) # type: ignore
except asyncio.CancelledError:
verbose_router_logger.debug(
"Received 'task.cancel'. Cancelling call w/ model={}.".format(model)
)
raise
except Exception as e:
return e
pending_tasks = [] # type: ignore
async def check_response(task: asyncio.Task):
nonlocal pending_tasks
try:
result = await task
if isinstance(result, (ModelResponse, CustomStreamWrapper)):
verbose_router_logger.debug(
"Received successful response. Cancelling other LLM API calls."
)
# If a desired response is received, cancel all other pending tasks
for t in pending_tasks:
t.cancel()
return result
except Exception:
# Ignore exceptions, let the loop handle them
pass
finally:
# Remove the task from pending tasks if it finishes
try:
pending_tasks.remove(task)
except KeyError:
pass
for model in models:
task = asyncio.create_task(
_async_completion_no_exceptions(
model=model, messages=messages, stream=stream, **kwargs
)
)
pending_tasks.append(task)
# Await the first task to complete successfully
while pending_tasks:
done, pending_tasks = await asyncio.wait( # type: ignore
pending_tasks, return_when=asyncio.FIRST_COMPLETED
)
for completed_task in done:
result = await check_response(completed_task)
if result is not None:
# Return the first successful result
result._hidden_params["fastest_response_batch_completion"] = True
return result
# If we exit the loop without returning, all tasks failed
raise Exception("All tasks failed")
def image_generation(self, prompt: str, model: str, **kwargs):
try:
kwargs["model"] = model
@ -1045,6 +1204,84 @@ class Router:
self.fail_calls[model_name] += 1
raise e
async def aspeech(self, model: str, input: str, voice: str, **kwargs):
"""
Example Usage:
```
from litellm import Router
client = Router(model_list = [
{
"model_name": "tts",
"litellm_params": {
"model": "tts-1",
},
},
])
async with client.aspeech(
model="tts",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
api_base=None,
api_key=None,
organization=None,
project=None,
max_retries=1,
timeout=600,
client=None,
optional_params={},
) as response:
response.stream_to_file(speech_file_path)
```
"""
try:
kwargs["input"] = input
kwargs["voice"] = voice
deployment = await self.async_get_available_deployment(
model=model,
messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None),
)
kwargs.setdefault("metadata", {}).update(
{
"deployment": deployment["litellm_params"]["model"],
"model_info": deployment.get("model_info", {}),
}
)
kwargs["model_info"] = deployment.get("model_info", {})
data = deployment["litellm_params"].copy()
model_name = data["model"]
for k, v in self.default_litellm_params.items():
if (
k not in kwargs
): # prioritize model-specific params > default router params
kwargs[k] = v
elif k == "metadata":
kwargs[k].update(v)
potential_model_client = self._get_client(
deployment=deployment, kwargs=kwargs, client_type="async"
)
# check if provided keys == client keys #
dynamic_api_key = kwargs.get("api_key", None)
if (
dynamic_api_key is not None
and potential_model_client is not None
and dynamic_api_key != potential_model_client.api_key
):
model_client = None
else:
model_client = potential_model_client
response = await litellm.aspeech(**data, **kwargs)
return response
except Exception as e:
raise e
async def amoderation(self, model: str, input: str, **kwargs):
try:
kwargs["model"] = model
@ -1693,7 +1930,8 @@ class Router:
)
await asyncio.sleep(_timeout)
try:
original_exception.message += f"\nNumber Retries = {current_attempt}"
cooldown_deployments = await self._async_get_cooldown_deployments()
original_exception.message += f"\nNumber Retries = {current_attempt + 1}, Max Retries={num_retries}\nCooldown Deployments={cooldown_deployments}"
except:
pass
raise original_exception
@ -1986,7 +2224,7 @@ class Router:
)
)
if _time_to_cooldown < 0:
if _time_to_cooldown is None or _time_to_cooldown < 0:
# if the response headers did not read it -> set to default cooldown time
_time_to_cooldown = self.cooldown_time
@ -2082,6 +2320,9 @@ class Router:
elif exception_status == 408:
return True
elif exception_status == 404:
return True
else:
# Do NOT cool down all other 4XX Errors
return False
@ -2107,6 +2348,7 @@ class Router:
the exception is not one that should be immediately retried (e.g. 401)
"""
args = locals()
if deployment is None:
return
@ -2139,7 +2381,6 @@ class Router:
)
exception_status = 500
_should_retry = litellm._should_retry(status_code=exception_status)
if updated_fails > self.allowed_fails or _should_retry == False:
# get the current cooldown list for that minute
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
@ -2453,8 +2694,17 @@ class Router:
if "azure" in model_name:
if api_base is None or not isinstance(api_base, str):
filtered_litellm_params = {
k: v
for k, v in model["litellm_params"].items()
if k != "api_key"
}
_filtered_model = {
"model_name": model["model_name"],
"litellm_params": filtered_litellm_params,
}
raise ValueError(
f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
)
azure_ad_token = litellm_params.get("azure_ad_token")
if azure_ad_token is not None:
@ -3076,6 +3326,8 @@ class Router:
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=llm_provider
)
if supported_openai_params is None:
supported_openai_params = []
model_info = ModelMapInfo(
max_tokens=None,
max_input_tokens=None,
@ -3546,7 +3798,6 @@ class Router:
## get healthy deployments
### get all deployments
healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
if len(healthy_deployments) == 0:
# check if the user sent in a deployment name instead
healthy_deployments = [

View file

@ -0,0 +1,2 @@
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}

View file

@ -14,7 +14,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import pytest, logging, asyncio
import litellm, asyncio
from litellm.proxy.proxy_server import add_new_model, update_model
from litellm.proxy.proxy_server import add_new_model, update_model, LitellmUserRoles
from litellm._logging import verbose_proxy_logger
from litellm.proxy.utils import PrismaClient, ProxyLogging
@ -90,7 +90,9 @@ async def test_add_new_model(prisma_client):
),
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN.value,
api_key="sk-1234",
user_id="1234",
),
)
@ -137,7 +139,9 @@ async def test_add_update_model(prisma_client):
),
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN.value,
api_key="sk-1234",
user_id="1234",
),
)
@ -166,7 +170,9 @@ async def test_add_update_model(prisma_client):
),
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN.value,
api_key="sk-1234",
user_id="1234",
),
)

View file

@ -499,6 +499,36 @@ async def test_webhook_alerting(alerting_type):
mock_send_alert.assert_awaited_once()
# @pytest.mark.asyncio
# async def test_webhook_customer_spend_event():
# """
# Test if customer spend is working as expected
# """
# slack_alerting = SlackAlerting(alerting=["webhook"])
# with patch.object(
# slack_alerting, "send_webhook_alert", new=AsyncMock()
# ) as mock_send_alert:
# user_info = {
# "token": "50e55ca5bfbd0759697538e8d23c0cd5031f52d9e19e176d7233b20c7c4d3403",
# "spend": 1,
# "max_budget": 0,
# "user_id": "ishaan@berri.ai",
# "user_email": "ishaan@berri.ai",
# "key_alias": "my-test-key",
# "projected_exceeded_date": "10/20/2024",
# "projected_spend": 200,
# }
# user_info = CallInfo(**user_info)
# for _ in range(50):
# await slack_alerting.budget_alerts(
# type=alerting_type,
# user_info=user_info,
# )
# mock_send_alert.assert_awaited_once()
@pytest.mark.parametrize(
"model, api_base, llm_provider, vertex_project, vertex_location",
[

View file

@ -0,0 +1,96 @@
# What is this?
## unit tests for openai tts endpoint
import sys, os, asyncio, time, random, uuid
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm, openai
from pathlib import Path
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_audio_speech_litellm(sync_mode):
speech_file_path = Path(__file__).parent / "speech.mp3"
if sync_mode:
response = litellm.speech(
model="openai/tts-1",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
api_base=None,
api_key=None,
organization=None,
project=None,
max_retries=1,
timeout=600,
client=None,
optional_params={},
)
from litellm.llms.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)
else:
response = await litellm.aspeech(
model="openai/tts-1",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
api_base=None,
api_key=None,
organization=None,
project=None,
max_retries=1,
timeout=600,
client=None,
optional_params={},
)
from litellm.llms.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)
@pytest.mark.parametrize("mode", ["iterator"]) # "file",
@pytest.mark.asyncio
async def test_audio_speech_router(mode):
speech_file_path = Path(__file__).parent / "speech.mp3"
from litellm import Router
client = Router(
model_list=[
{
"model_name": "tts",
"litellm_params": {
"model": "openai/tts-1",
},
},
]
)
response = await client.aspeech(
model="tts",
voice="alloy",
input="the quick brown fox jumped over the lazy dogs",
api_base=None,
api_key=None,
organization=None,
project=None,
max_retries=1,
timeout=600,
client=None,
optional_params={},
)
from litellm.llms.openai import HttpxBinaryResponseContent
assert isinstance(response, HttpxBinaryResponseContent)

View file

@ -0,0 +1,62 @@
# What is this?
## Tests if 'get_end_user_object' works as expected
import sys, os, asyncio, time, random, uuid
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest, litellm
from litellm.proxy.auth.auth_checks import get_end_user_object
from litellm.caching import DualCache
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
from litellm.proxy.utils import PrismaClient
@pytest.mark.parametrize("customer_spend, customer_budget", [(0, 10), (10, 0)])
@pytest.mark.asyncio
async def test_get_end_user_object(customer_spend, customer_budget):
"""
Scenario 1: normal
Scenario 2: user over budget
"""
end_user_id = "my-test-customer"
_budget = LiteLLM_BudgetTable(max_budget=customer_budget)
end_user_obj = LiteLLM_EndUserTable(
user_id=end_user_id,
spend=customer_spend,
litellm_budget_table=_budget,
blocked=False,
)
_cache = DualCache()
_key = "end_user_id:{}".format(end_user_id)
_cache.set_cache(key=_key, value=end_user_obj)
try:
await get_end_user_object(
end_user_id=end_user_id,
prisma_client="RANDOM VALUE", # type: ignore
user_api_key_cache=_cache,
)
if customer_spend > customer_budget:
pytest.fail(
"Expected call to fail. Customer Spend={}, Customer Budget={}".format(
customer_spend, customer_budget
)
)
except Exception as e:
if (
isinstance(e, litellm.BudgetExceededError)
and customer_spend > customer_budget
):
pass
else:
pytest.fail(
"Expected call to work. Customer Spend={}, Customer Budget={}, Error={}".format(
customer_spend, customer_budget, str(e)
)
)

View file

@ -7,7 +7,7 @@ import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent-directory to the system path
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion, completion_cost, Timeout
@ -38,7 +38,7 @@ def reset_callbacks():
@pytest.mark.skip(reason="Local test")
def test_response_model_none():
"""
Addresses: https://github.com/BerriAI/litellm/issues/2972
Addresses:https://github.com/BerriAI/litellm/issues/2972
"""
x = completion(
model="mymodel",
@ -1397,6 +1397,81 @@ def test_hf_classifier_task():
pytest.fail(f"Error occurred: {str(e)}")
def test_ollama_image():
"""
Test that datauri prefixes are removed, JPEG/PNG images are passed
through, and other image formats are converted to JPEG. Non-image
data is untouched.
"""
import io, base64
from PIL import Image
def mock_post(url, **kwargs):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {"Content-Type": "application/json"}
mock_response.json.return_value = {
# return the image in the response so that it can be tested
# against the original
"response": kwargs["json"]["images"]
}
return mock_response
def make_b64image(format):
image = Image.new(mode="RGB", size=(1, 1))
image_buffer = io.BytesIO()
image.save(image_buffer, format)
return base64.b64encode(image_buffer.getvalue()).decode("utf-8")
jpeg_image = make_b64image("JPEG")
webp_image = make_b64image("WEBP")
png_image = make_b64image("PNG")
base64_data = base64.b64encode(b"some random data")
datauri_base64_data = f"data:text/plain;base64,{base64_data}"
tests = [
# input expected
[jpeg_image, jpeg_image],
[webp_image, None],
[png_image, png_image],
[f"data:image/jpeg;base64,{jpeg_image}", jpeg_image],
[f"data:image/webp;base64,{webp_image}", None],
[f"data:image/png;base64,{png_image}", png_image],
[datauri_base64_data, datauri_base64_data],
]
for test in tests:
try:
with patch("requests.post", side_effect=mock_post):
response = completion(
model="ollama/llava",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {"url": test[0]},
},
],
}
],
)
if not test[1]:
# the conversion process may not always generate the same image,
# so just check for a JPEG image when a conversion was done.
image_data = response["choices"][0]["message"]["content"][0]
image = Image.open(io.BytesIO(base64.b64decode(image_data)))
assert image.format == "JPEG"
else:
assert response["choices"][0]["message"]["content"][0] == test[1]
except Exception as e:
pytest.fail(f"Error occurred: {e}")
########################### End of Hugging Face Tests ##############################################
# def test_completion_hf_api():
# # failing on circle-ci commenting out

View file

@ -13,7 +13,7 @@ sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the, system path
import pytest, litellm
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict
from litellm.proxy.proxy_server import ProxyConfig
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
@ -26,8 +26,7 @@ class DBModel(BaseModel):
model_info: dict
litellm_params: dict
class Config:
protected_namespaces = ()
config_dict: ConfigDict = ConfigDict(protected_namespaces=())
@pytest.mark.asyncio

View file

@ -61,6 +61,7 @@ from litellm.proxy.proxy_server import (
audio_transcriptions,
moderations,
model_list,
LitellmUserRoles,
)
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
from litellm._logging import verbose_proxy_logger
@ -137,7 +138,9 @@ async def test_new_user_response(prisma_client):
team_id=_team_id,
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
)
@ -206,7 +209,7 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
await litellm.proxy.proxy_server.prisma_client.connect()
from litellm.proxy.proxy_server import user_api_key_cache
request = NewUserRequest(user_role="app_owner")
request = NewUserRequest(user_role=LitellmUserRoles.INTERNAL_USER)
key = await new_user(request)
print(key)
user_id = key.user_id
@ -215,7 +218,7 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
new_user_info = await user_info(user_id=user_id)
new_user_info = new_user_info["user_info"]
print("new_user_info=", new_user_info)
assert new_user_info.user_role == "app_owner"
assert new_user_info.user_role == LitellmUserRoles.INTERNAL_USER
assert new_user_info.user_id == user_id
generated_key = key.key
@ -363,7 +366,8 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
)
new_team_response = await new_team(
data=team_request, user_api_key_dict=UserAPIKeyAuth(user_role="proxy_admin")
data=team_request,
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
)
print("new_team_response", new_team_response)
created_team_id = new_team_response["team_id"]
@ -559,7 +563,7 @@ def test_call_with_end_user_over_budget(prisma_client):
asyncio.run(test())
except Exception as e:
error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail
assert "Budget has been exceeded! Current" in error_detail
print(vars(e))
@ -922,7 +926,7 @@ def test_delete_key(prisma_client):
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print(f"result: {result}")
result.user_role = "proxy_admin"
result.user_role = LitellmUserRoles.PROXY_ADMIN
# delete the key
result_delete_key = await delete_key_fn(
data=delete_key_request, user_api_key_dict=result
@ -972,7 +976,7 @@ def test_delete_key_auth(prisma_client):
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print(f"result: {result}")
result.user_role = "proxy_admin"
result.user_role = LitellmUserRoles.PROXY_ADMIN
result_delete_key = await delete_key_fn(
data=delete_key_request, user_api_key_dict=result
@ -1044,7 +1048,7 @@ def test_generate_and_call_key_info(prisma_client):
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print(f"result: {result}")
result.user_role = "proxy_admin"
result.user_role = LitellmUserRoles.PROXY_ADMIN
result_delete_key = await delete_key_fn(
data=delete_key_request, user_api_key_dict=result
@ -1078,7 +1082,9 @@ def test_generate_and_update_key(prisma_client):
team_id=_team_1,
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
)
@ -1088,7 +1094,9 @@ def test_generate_and_update_key(prisma_client):
team_id=_team_2,
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
)
@ -1158,7 +1166,7 @@ def test_generate_and_update_key(prisma_client):
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print(f"result: {result}")
result.user_role = "proxy_admin"
result.user_role = LitellmUserRoles.PROXY_ADMIN
result_delete_key = await delete_key_fn(
data=delete_key_request, user_api_key_dict=result
@ -2038,7 +2046,9 @@ async def test_master_key_hashing(prisma_client):
await new_team(
NewTeamRequest(team_id=_team_id),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
)
@ -2076,7 +2086,7 @@ async def test_reset_spend_authentication(prisma_client):
"""
1. Test master key can access this route -> ONLY MASTER KEY SHOULD BE ABLE TO RESET SPEND
2. Test that non-master key gets rejected
3. Test that non-master key with role == "proxy_admin" or admin gets rejected
3. Test that non-master key with role == LitellmUserRoles.PROXY_ADMIN or admin gets rejected
"""
print("prisma client=", prisma_client)
@ -2121,10 +2131,10 @@ async def test_reset_spend_authentication(prisma_client):
in e.message
)
# Test 3 - Non-Master Key with role == "proxy_admin" or admin
# Test 3 - Non-Master Key with role == LitellmUserRoles.PROXY_ADMIN or admin
_response = await new_user(
data=NewUserRequest(
user_role="proxy_admin",
user_role=LitellmUserRoles.PROXY_ADMIN,
tpm_limit=20,
)
)
@ -2174,7 +2184,9 @@ async def test_create_update_team(prisma_client):
rpm_limit=20,
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
)
@ -2200,7 +2212,9 @@ async def test_create_update_team(prisma_client):
rpm_limit=30,
),
user_api_key_dict=UserAPIKeyAuth(
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
user_id="1234",
),
)

View file

@ -0,0 +1,161 @@
# What is this?
## Unit Tests for OpenAI Batches API
import sys, os, json
import traceback
import asyncio
from dotenv import load_dotenv
load_dotenv()
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest, logging, asyncio
import litellm
from litellm import (
create_batch,
create_file,
)
import time
def test_create_batch():
"""
1. Create File for Batch completion
2. Create Batch Request
3. Retrieve the specific batch
"""
file_name = "openai_batch_completions.jsonl"
_current_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(_current_dir, file_name)
file_obj = litellm.create_file(
file=open(file_path, "rb"),
purpose="batch",
custom_llm_provider="openai",
)
print("Response from creating file=", file_obj)
batch_input_file_id = file_obj.id
assert (
batch_input_file_id is not None
), "Failed to create file, expected a non null file_id but got {batch_input_file_id}"
create_batch_response = litellm.create_batch(
completion_window="24h",
endpoint="/v1/chat/completions",
input_file_id=batch_input_file_id,
custom_llm_provider="openai",
metadata={"key1": "value1", "key2": "value2"},
)
print("response from litellm.create_batch=", create_batch_response)
assert (
create_batch_response.id is not None
), f"Failed to create batch, expected a non null batch_id but got {create_batch_response.id}"
assert (
create_batch_response.endpoint == "/v1/chat/completions"
), f"Failed to create batch, expected endpoint to be /v1/chat/completions but got {create_batch_response.endpoint}"
assert (
create_batch_response.input_file_id == batch_input_file_id
), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
retrieved_batch = litellm.retrieve_batch(
batch_id=create_batch_response.id, custom_llm_provider="openai"
)
print("retrieved batch=", retrieved_batch)
# just assert that we retrieved a non None batch
assert retrieved_batch.id == create_batch_response.id
file_content = litellm.file_content(
file_id=batch_input_file_id, custom_llm_provider="openai"
)
result = file_content.content
result_file_name = "batch_job_results_furniture.jsonl"
with open(result_file_name, "wb") as file:
file.write(result)
pass
@pytest.mark.asyncio()
async def test_async_create_batch():
"""
1. Create File for Batch completion
2. Create Batch Request
3. Retrieve the specific batch
"""
print("Testing async create batch")
file_name = "openai_batch_completions.jsonl"
_current_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(_current_dir, file_name)
file_obj = await litellm.acreate_file(
file=open(file_path, "rb"),
purpose="batch",
custom_llm_provider="openai",
)
print("Response from creating file=", file_obj)
batch_input_file_id = file_obj.id
assert (
batch_input_file_id is not None
), "Failed to create file, expected a non null file_id but got {batch_input_file_id}"
create_batch_response = await litellm.acreate_batch(
completion_window="24h",
endpoint="/v1/chat/completions",
input_file_id=batch_input_file_id,
custom_llm_provider="openai",
metadata={"key1": "value1", "key2": "value2"},
)
print("response from litellm.create_batch=", create_batch_response)
assert (
create_batch_response.id is not None
), f"Failed to create batch, expected a non null batch_id but got {create_batch_response.id}"
assert (
create_batch_response.endpoint == "/v1/chat/completions"
), f"Failed to create batch, expected endpoint to be /v1/chat/completions but got {create_batch_response.endpoint}"
assert (
create_batch_response.input_file_id == batch_input_file_id
), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
await asyncio.sleep(1)
retrieved_batch = await litellm.aretrieve_batch(
batch_id=create_batch_response.id, custom_llm_provider="openai"
)
print("retrieved batch=", retrieved_batch)
# just assert that we retrieved a non None batch
assert retrieved_batch.id == create_batch_response.id
# try to get file content for our original file
file_content = await litellm.afile_content(
file_id=batch_input_file_id, custom_llm_provider="openai"
)
print("file content = ", file_content)
# # write this file content to a file
# with open("file_content.json", "w") as f:
# json.dump(file_content, f)
def test_retrieve_batch():
pass
def test_cancel_batch():
pass
def test_list_batch():
pass

View file

@ -97,6 +97,18 @@ def test_databricks_optional_params():
assert "user" not in optional_params
def test_azure_ai_mistral_optional_params():
litellm.drop_params = True
optional_params = get_optional_params(
model="mistral-large-latest",
user="John",
custom_llm_provider="openai",
max_tokens=10,
temperature=0.2,
)
assert "user" not in optional_params
def test_azure_gpt_optional_params_gpt_vision():
# for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
optional_params = litellm.utils.get_optional_params(

View file

@ -19,6 +19,25 @@ import os, httpx
load_dotenv()
def test_router_sensitive_keys():
try:
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": "special-key",
},
"model_info": {"id": 12345},
},
],
)
except Exception as e:
print(f"error msg - {str(e)}")
assert "special-key" not in str(e)
@pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4])
def test_router_num_retries_init(num_retries, max_retries):

View file

@ -19,8 +19,141 @@ import os, httpx
load_dotenv()
@pytest.mark.parametrize("mode", ["all_responses", "fastest_response"])
@pytest.mark.asyncio
async def test_batch_completion_multiple_models():
async def test_batch_completion_multiple_models(mode):
litellm.set_verbose = True
router = litellm.Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
},
},
{
"model_name": "groq-llama",
"litellm_params": {
"model": "groq/llama3-8b-8192",
},
},
]
)
if mode == "all_responses":
response = await router.abatch_completion(
models=["gpt-3.5-turbo", "groq-llama"],
messages=[
{"role": "user", "content": "is litellm becoming a better product ?"}
],
max_tokens=15,
)
print(response)
assert len(response) == 2
models_in_responses = []
for individual_response in response:
_model = individual_response["model"]
models_in_responses.append(_model)
# assert both models are different
assert models_in_responses[0] != models_in_responses[1]
elif mode == "fastest_response":
from openai.types.chat.chat_completion import ChatCompletion
response = await router.abatch_completion_fastest_response(
model="gpt-3.5-turbo, groq-llama",
messages=[
{"role": "user", "content": "is litellm becoming a better product ?"}
],
max_tokens=15,
)
ChatCompletion.model_validate(response.model_dump(), strict=True)
@pytest.mark.asyncio
async def test_batch_completion_fastest_response_unit_test():
"""
Unit test to confirm fastest response will always return the response which arrives earliest.
2 models -> 1 is cached, the other is a real llm api call => assert cached response always returned
"""
litellm.set_verbose = True
router = litellm.Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4",
},
"model_info": {"id": "1"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"mock_response": "This is a fake response",
},
"model_info": {"id": "2"},
},
]
)
response = await router.abatch_completion_fastest_response(
model="gpt-4, gpt-3.5-turbo",
messages=[
{"role": "user", "content": "is litellm becoming a better product ?"}
],
max_tokens=500,
)
assert response._hidden_params["model_id"] == "2"
assert response.choices[0].message.content == "This is a fake response"
print(f"response: {response}")
@pytest.mark.asyncio
async def test_batch_completion_fastest_response_streaming():
litellm.set_verbose = True
router = litellm.Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
},
},
{
"model_name": "groq-llama",
"litellm_params": {
"model": "groq/llama3-8b-8192",
},
},
]
)
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
response = await router.abatch_completion_fastest_response(
model="gpt-3.5-turbo, groq-llama",
messages=[
{"role": "user", "content": "is litellm becoming a better product ?"}
],
max_tokens=15,
stream=True,
)
async for chunk in response:
ChatCompletionChunk.model_validate(chunk.model_dump(), strict=True)
@pytest.mark.asyncio
async def test_batch_completion_multiple_models_multiple_messages():
litellm.set_verbose = True
router = litellm.Router(
@ -43,18 +176,21 @@ async def test_batch_completion_multiple_models():
response = await router.abatch_completion(
models=["gpt-3.5-turbo", "groq-llama"],
messages=[
{"role": "user", "content": "is litellm becoming a better product ?"}
[{"role": "user", "content": "is litellm becoming a better product ?"}],
[{"role": "user", "content": "who is this"}],
],
max_tokens=15,
)
print(response)
print("response from batches =", response)
assert len(response) == 2
assert len(response[0]) == 2
assert isinstance(response[0][0], litellm.ModelResponse)
models_in_responses = []
for individual_response in response:
_model = individual_response["model"]
models_in_responses.append(_model)
# models_in_responses = []
# for individual_response in response:
# _model = individual_response["model"]
# models_in_responses.append(_model)
# assert both models are different
assert models_in_responses[0] != models_in_responses[1]
# # assert both models are different
# assert models_in_responses[0] != models_in_responses[1]

View file

@ -3,7 +3,7 @@
import sys, os, asyncio
import traceback
import time, pytest
import time, pytest, uuid
from pydantic import BaseModel
from typing import Tuple
@ -241,203 +241,138 @@ def test_completion_azure_stream_content_filter_no_delta():
"""
try:
chunks = [
{
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": "",
"role": "assistant"
},
"finish_reason": None,
"index": 0
"delta": {"content": "", "role": "assistant"},
"finish_reason": None,
"index": 0,
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{"delta": {"content": "This"}, "finish_reason": None, "index": 0}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{"delta": {"content": " is"}, "finish_reason": None, "index": 0}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{"delta": {"content": " a"}, "finish_reason": None, "index": 0}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{"delta": {"content": " dummy"}, "finish_reason": None, "index": 0}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": "This"
},
"finish_reason": None,
"index": 0
"delta": {"content": " response"},
"finish_reason": None,
"index": 0,
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " is"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " a"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " dummy"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": " response"
},
"finish_reason": None,
"index": 0
}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "",
"choices": [
{
"finish_reason": None,
"index": 0,
"content_filter_offsets": {
"check_offset": 35159,
"start_offset": 35159,
"end_offset": 36150
},
"content_filter_results": {
"hate": {
"filtered": False,
"severity": "safe"
"finish_reason": None,
"index": 0,
"content_filter_offsets": {
"check_offset": 35159,
"start_offset": 35159,
"end_offset": 36150,
},
"self_harm": {
"filtered": False,
"severity": "safe"
"content_filter_results": {
"hate": {"filtered": False, "severity": "safe"},
"self_harm": {"filtered": False, "severity": "safe"},
"sexual": {"filtered": False, "severity": "safe"},
"violence": {"filtered": False, "severity": "safe"},
},
"sexual": {
"filtered": False,
"severity": "safe"
},
"violence": {
"filtered": False,
"severity": "safe"
}
}
}
],
"created": 0,
"model": "",
"object": ""
},
{
"object": "",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {
"content": "."
},
"finish_reason": None,
"index": 0
}
{"delta": {"content": "."}, "finish_reason": None, "index": 0}
],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
"choices": [
{
"delta": {},
"finish_reason": "stop",
"index": 0
}
],
"choices": [{"delta": {}, "finish_reason": "stop", "index": 0}],
"created": 1716563849,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_5f4bad809a"
},
{
"system_fingerprint": "fp_5f4bad809a",
},
{
"id": "",
"choices": [
{
"finish_reason": None,
"index": 0,
"content_filter_offsets": {
"check_offset": 36150,
"start_offset": 36060,
"end_offset": 37029
},
"content_filter_results": {
"hate": {
"filtered": False,
"severity": "safe"
"finish_reason": None,
"index": 0,
"content_filter_offsets": {
"check_offset": 36150,
"start_offset": 36060,
"end_offset": 37029,
},
"self_harm": {
"filtered": False,
"severity": "safe"
"content_filter_results": {
"hate": {"filtered": False, "severity": "safe"},
"self_harm": {"filtered": False, "severity": "safe"},
"sexual": {"filtered": False, "severity": "safe"},
"violence": {"filtered": False, "severity": "safe"},
},
"sexual": {
"filtered": False,
"severity": "safe"
},
"violence": {
"filtered": False,
"severity": "safe"
}
}
}
],
"created": 0,
"model": "",
"object": ""
}
"object": "",
},
]
chunk_list = []
@ -1449,29 +1384,68 @@ def test_bedrock_claude_3_streaming():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_claude_3_streaming_finish_reason():
async def test_claude_3_streaming_finish_reason(sync_mode):
try:
import threading
litellm.set_verbose = True
messages = [
{"role": "system", "content": "Be helpful"},
{"role": "user", "content": "What do you know?"},
]
response: ModelResponse = await litellm.acompletion( # type: ignore
model="claude-3-opus-20240229",
messages=messages,
stream=True,
max_tokens=10,
)
complete_response = ""
# Add any assertions here to-check the response
num_finish_reason = 0
async for chunk in response:
print(f"chunk: {chunk}")
if isinstance(chunk, ModelResponse):
if chunk.choices[0].finish_reason is not None:
num_finish_reason += 1
assert num_finish_reason == 1
def sync_test_streaming():
response: litellm.CustomStreamWrapper = litellm.acompletion( # type: ignore
model="claude-3-opus-20240229",
messages=messages,
stream=True,
max_tokens=10,
)
complete_response = ""
# Add any assertions here to-check the response
num_finish_reason = 0
for chunk in response:
print(f"chunk: {chunk}")
if isinstance(chunk, ModelResponse):
if chunk.choices[0].finish_reason is not None:
num_finish_reason += 1
assert num_finish_reason == 1
async def test_streaming():
response: litellm.CustomStreamWrapper = await litellm.acompletion( # type: ignore
model="claude-3-opus-20240229",
messages=messages,
stream=True,
max_tokens=10,
)
complete_response = ""
# Add any assertions here to-check the response
num_finish_reason = 0
async for chunk in response:
print(f"chunk: {chunk}")
if isinstance(chunk, ModelResponse):
if chunk.choices[0].finish_reason is not None:
num_finish_reason += 1
assert num_finish_reason == 1
tasks = []
for _ in range(2):
if sync_mode == False:
tasks.append(test_streaming())
else:
thread = threading.Thread(target=sync_test_streaming)
thread.start()
tasks.append(thread)
if sync_mode == False:
await asyncio.gather(*tasks)
else:
# Wait for all threads to complete
for thread in tasks:
thread.join()
except RateLimitError:
pass
except Exception as e:

View file

@ -1,49 +1,35 @@
# Commented out for now - since traceloop break ci/cd
# import sys
# import os
# import io, asyncio
import sys
import os
import time
import pytest
import litellm
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from traceloop.sdk import Traceloop
# sys.path.insert(0, os.path.abspath('../..'))
# from litellm import completion
# import litellm
# litellm.num_retries = 3
# litellm.success_callback = [""]
# import time
# import pytest
# from traceloop.sdk import Traceloop
# Traceloop.init(app_name="test-litellm", disable_batch=True)
sys.path.insert(0, os.path.abspath("../.."))
# def test_traceloop_logging():
# try:
# litellm.set_verbose = True
# response = litellm.completion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content":"This is a test"}],
# max_tokens=1000,
# temperature=0.7,
# timeout=5,
# )
# print(f"response: {response}")
# except Exception as e:
# pytest.fail(f"An exception occurred - {e}")
# # test_traceloop_logging()
@pytest.fixture()
def exporter():
exporter = InMemorySpanExporter()
Traceloop.init(
app_name="test_litellm",
disable_batch=True,
exporter=exporter,
)
litellm.success_callback = ["traceloop"]
litellm.set_verbose = True
return exporter
# # def test_traceloop_logging_async():
# # try:
# # litellm.set_verbose = True
# # async def test_acompletion():
# # return await litellm.acompletion(
# # model="gpt-3.5-turbo",
# # messages=[{"role": "user", "content":"This is a test"}],
# # max_tokens=1000,
# # temperature=0.7,
# # timeout=5,
# # )
# # response = asyncio.run(test_acompletion())
# # print(f"response: {response}")
# # except Exception as e:
# # pytest.fail(f"An exception occurred - {e}")
# # test_traceloop_logging_async()
@pytest.mark.parametrize("model", ["claude-instant-1.2", "gpt-3.5-turbo"])
def test_traceloop_logging(exporter, model):
litellm.completion(
model=model,
messages=[{"role": "user", "content": "This is a test"}],
max_tokens=1000,
temperature=0.7,
timeout=5,
)

View file

@ -1,6 +1,6 @@
from typing import List, Optional, Union, Iterable
from pydantic import BaseModel, validator
from pydantic import BaseModel, ConfigDict, validator
from typing_extensions import Literal, Required, TypedDict
@ -191,6 +191,4 @@ class CompletionRequest(BaseModel):
api_key: Optional[str] = None
model_list: Optional[List[str]] = None
class Config:
extra = "allow"
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=(), extra="allow")

View file

@ -1,6 +1,6 @@
from typing import List, Optional, Union
from pydantic import BaseModel, validator
from pydantic import BaseModel, ConfigDict
class EmbeddingRequest(BaseModel):
@ -18,6 +18,4 @@ class EmbeddingRequest(BaseModel):
litellm_logging_obj: Optional[dict] = None
logger_fn: Optional[str] = None
class Config:
# allow kwargs
extra = "allow"
model_config = ConfigDict(extra="allow")

View file

@ -6,9 +6,8 @@ from typing import (
Literal,
Iterable,
)
from typing_extensions import override, Required
from typing_extensions import override, Required, Dict
from pydantic import BaseModel
from openai.types.beta.threads.message_content import MessageContent
from openai.types.beta.threads.message import Message as OpenAIMessage
from openai.types.beta.thread_create_params import (
@ -18,8 +17,23 @@ from openai.types.beta.assistant_tool_param import AssistantToolParam
from openai.types.beta.threads.run import Run
from openai.types.beta.assistant import Assistant
from openai.pagination import SyncCursorPage
from os import PathLike
from openai.types import FileObject, Batch
from openai._legacy_response import HttpxBinaryResponseContent
from typing import TypedDict, List, Optional, Tuple, Mapping, IO
from typing import TypedDict, List, Optional
FileContent = Union[IO[bytes], bytes, PathLike]
FileTypes = Union[
# file (or bytes)
FileContent,
# (filename, file (or bytes))
Tuple[Optional[str], FileContent],
# (filename, file (or bytes), content_type)
Tuple[Optional[str], FileContent, Optional[str]],
# (filename, file (or bytes), content_type, headers)
Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
]
class NotGiven:
@ -146,3 +160,96 @@ class Thread(BaseModel):
object: Literal["thread"]
"""The object type, which is always `thread`."""
# OpenAI Files Types
class CreateFileRequest(TypedDict, total=False):
"""
CreateFileRequest
Used by Assistants API, Batches API, and Fine-Tunes API
Required Params:
file: FileTypes
purpose: Literal['assistants', 'batch', 'fine-tune']
Optional Params:
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]] = None
timeout: Optional[float] = None
"""
file: FileTypes
purpose: Literal["assistants", "batch", "fine-tune"]
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]
class FileContentRequest(TypedDict, total=False):
"""
FileContentRequest
Used by Assistants API, Batches API, and Fine-Tunes API
Required Params:
file_id: str
Optional Params:
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]] = None
timeout: Optional[float] = None
"""
file_id: str
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]
# OpenAI Batches Types
class CreateBatchRequest(TypedDict, total=False):
"""
CreateBatchRequest
"""
completion_window: Literal["24h"]
endpoint: Literal["/v1/chat/completions", "/v1/embeddings"]
input_file_id: str
metadata: Optional[Dict[str, str]]
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]
class RetrieveBatchRequest(TypedDict, total=False):
"""
RetrieveBatchRequest
"""
batch_id: str
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]
class CancelBatchRequest(TypedDict, total=False):
"""
CancelBatchRequest
"""
batch_id: str
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]
class ListBatchRequest(TypedDict, total=False):
"""
ListBatchRequest - List your organization's batches
Calls https://api.openai.com/v1/batches
"""
after: Union[str, NotGiven]
limit: Union[int, NotGiven]
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]

View file

@ -1,12 +1,12 @@
"""
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
"""
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
import uuid
import enum
import httpx
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
import datetime
from .completion import CompletionRequest
from .embedding import EmbeddingRequest
@ -18,8 +18,7 @@ class ModelConfig(BaseModel):
tpm: int
rpm: int
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class RouterConfig(BaseModel):
@ -50,8 +49,7 @@ class RouterConfig(BaseModel):
"latency-based-routing",
] = "simple-shuffle"
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class UpdateRouterConfig(BaseModel):
@ -71,17 +69,14 @@ class UpdateRouterConfig(BaseModel):
fallbacks: Optional[List[dict]] = None
context_window_fallbacks: Optional[List[dict]] = None
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class ModelInfo(BaseModel):
id: Optional[
str
] # Allow id to be optional on input, but it will always be present as a str in the model instance
db_model: bool = (
False # used for proxy - to separate models which are stored in the db vs. config.
)
db_model: bool = False # used for proxy - to separate models which are stored in the db vs. config.
updated_at: Optional[datetime.datetime] = None
updated_by: Optional[str] = None
@ -99,8 +94,7 @@ class ModelInfo(BaseModel):
id = str(id)
super().__init__(id=id, **params)
class Config:
extra = "allow"
model_config = ConfigDict(extra="allow")
def __contains__(self, key):
# Define custom behavior for the 'in' operator
@ -155,6 +149,8 @@ class GenericLiteLLMParams(BaseModel):
input_cost_per_second: Optional[float] = None
output_cost_per_second: Optional[float] = None
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
def __init__(
self,
custom_llm_provider: Optional[str] = None,
@ -184,7 +180,7 @@ class GenericLiteLLMParams(BaseModel):
output_cost_per_token: Optional[float] = None,
input_cost_per_second: Optional[float] = None,
output_cost_per_second: Optional[float] = None,
**params
**params,
):
args = locals()
args.pop("max_retries", None)
@ -195,10 +191,6 @@ class GenericLiteLLMParams(BaseModel):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)
class Config:
extra = "allow"
arbitrary_types_allowed = True
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)
@ -222,6 +214,7 @@ class LiteLLM_Params(GenericLiteLLMParams):
"""
model: str
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
def __init__(
self,
@ -245,7 +238,7 @@ class LiteLLM_Params(GenericLiteLLMParams):
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_region_name: Optional[str] = None,
**params
**params,
):
args = locals()
args.pop("max_retries", None)
@ -256,10 +249,6 @@ class LiteLLM_Params(GenericLiteLLMParams):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)
class Config:
extra = "allow"
arbitrary_types_allowed = True
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)
@ -288,8 +277,7 @@ class updateDeployment(BaseModel):
litellm_params: Optional[updateLiteLLMParams] = None
model_info: Optional[ModelInfo] = None
class Config:
protected_namespaces = ()
model_config = ConfigDict(protected_namespaces=())
class LiteLLMParamsTypedDict(TypedDict, total=False):
@ -338,12 +326,14 @@ class Deployment(BaseModel):
litellm_params: LiteLLM_Params
model_info: ModelInfo
model_config = ConfigDict(extra="allow", protected_namespaces=())
def __init__(
self,
model_name: str,
litellm_params: LiteLLM_Params,
model_info: Optional[Union[ModelInfo, dict]] = None,
**params
**params,
):
if model_info is None:
model_info = ModelInfo()
@ -353,7 +343,7 @@ class Deployment(BaseModel):
model_info=model_info,
model_name=model_name,
litellm_params=litellm_params,
**params
**params,
)
def to_json(self, **kwargs):
@ -363,10 +353,6 @@ class Deployment(BaseModel):
# if using pydantic v1
return self.dict(**kwargs)
class Config:
extra = "allow"
protected_namespaces = ()
def __contains__(self, key):
# Define custom behavior for the 'in' operator
return hasattr(self, key)

View file

@ -18,7 +18,7 @@ from functools import wraps, lru_cache
import datetime, time
import tiktoken
import uuid
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict
import aiohttp
import textwrap
import logging
@ -337,9 +337,7 @@ class HiddenParams(OpenAIObject):
model_id: Optional[str] = None # used in Router for individual deployments
api_base: Optional[str] = None # returns api base used for making completion call
class Config:
extra = "allow"
protected_namespaces = ()
model_config = ConfigDict(extra="allow", protected_namespaces=())
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
@ -1136,6 +1134,8 @@ class CallTypes(Enum):
amoderation = "amoderation"
atranscription = "atranscription"
transcription = "transcription"
aspeech = "aspeech"
speech = "speech"
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
@ -2027,6 +2027,7 @@ class Logging:
response_obj=result,
start_time=start_time,
end_time=end_time,
user_id=kwargs.get("user", None),
print_verbose=print_verbose,
)
if callback == "s3":
@ -2598,6 +2599,17 @@ class Logging:
level="ERROR",
kwargs=self.model_call_details,
)
if callback == "traceloop":
traceloopLogger.log_event(
start_time=start_time,
end_time=end_time,
response_obj=None,
user_id=kwargs.get("user", None),
print_verbose=print_verbose,
status_message=str(exception),
level="ERROR",
kwargs=self.model_call_details,
)
if callback == "prometheus":
global prometheusLogger
verbose_logger.debug("reaches prometheus for success logging!")
@ -2993,6 +3005,10 @@ def function_setup(
):
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
messages = "audio_file"
elif (
call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
):
messages = kwargs.get("input", "speech")
stream = True if "stream" in kwargs and kwargs["stream"] == True else False
logging_obj = Logging(
model=model,
@ -3334,6 +3350,8 @@ def client(original_function):
return result
elif "atranscription" in kwargs and kwargs["atranscription"] == True:
return result
elif "aspeech" in kwargs and kwargs["aspeech"] == True:
return result
### POST-CALL RULES ###
post_call_processing(original_response=result, model=model or None)
@ -5740,6 +5758,8 @@ def get_optional_params(
optional_params["stream"] = stream
if temperature is not None:
optional_params["temperature"] = temperature
if seed is not None:
optional_params["seed"] = seed
if top_p is not None:
optional_params["top_p"] = top_p
if frequency_penalty is not None:
@ -6392,6 +6412,8 @@ def get_supported_openai_params(
return ["stream", "temperature", "max_tokens"]
elif model.startswith("mistral"):
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
elif custom_llm_provider == "ollama":
return litellm.OllamaConfig().get_supported_openai_params()
elif custom_llm_provider == "ollama_chat":
return litellm.OllamaChatConfig().get_supported_openai_params()
elif custom_llm_provider == "anthropic":
@ -6561,16 +6583,6 @@ def get_supported_openai_params(
]
elif custom_llm_provider == "cloudflare":
return ["max_tokens", "stream"]
elif custom_llm_provider == "ollama":
return [
"max_tokens",
"stream",
"top_p",
"temperature",
"frequency_penalty",
"stop",
"response_format",
]
elif custom_llm_provider == "nlp_cloud":
return [
"max_tokens",

View file

@ -1265,8 +1265,8 @@
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.0000075,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.38.11"
version = "1.39.5"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -79,8 +79,10 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.38.11"
version = "1.39.5"
version_files = [
"pyproject.toml:^version"
]
[tool.mypy]
plugins = "pydantic.mypy"

View file

@ -12,6 +12,7 @@ sys.path.insert(
0, os.path.abspath("../")
) # Adds the parent directory to the system path
import litellm
from litellm.proxy._types import LitellmUserRoles
async def generate_team(
@ -731,7 +732,9 @@ async def test_key_delete_ui():
# generate a admin UI key
team = await generate_team(session=session)
admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
admin_ui_key = await generate_user(
session=session, user_role=LitellmUserRoles.PROXY_ADMIN.value
)
print(
"trying to delete key=",
key,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5d93d4a9fa59d72f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/33354d8285fe572e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/33354d8285fe572e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45014,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"359\",\"static/chunks/359-f105a7fb61fe8110.js\",\"440\",\"static/chunks/440-b9a05f116e1a696d.js\",\"134\",\"static/chunks/134-4a7b43f992182f2c.js\",\"931\",\"static/chunks/app/page-f610596e5fb3cce4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/33354d8285fe572e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PcGFjo5-03lHREJ3E0k6y\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
3:I[45014,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","359","static/chunks/359-f105a7fb61fe8110.js","440","static/chunks/440-b9a05f116e1a696d.js","134","static/chunks/134-4a7b43f992182f2c.js","931","static/chunks/app/page-f610596e5fb3cce4.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,7 @@
2:I[77831,[],""]
3:I[87494,["359","static/chunks/359-f105a7fb61fe8110.js","134","static/chunks/134-4a7b43f992182f2c.js","418","static/chunks/app/model_hub/page-aa3c10cf9bb31255.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -18,7 +18,7 @@ import Usage from "../components/usage";
import { jwtDecode } from "jwt-decode";
import { Typography } from "antd";
export function formatUserRole(userRole: string) {
function formatUserRole(userRole: string) {
if (!userRole) {
return "Undefined Role";
}

View file

@ -58,6 +58,7 @@ import {
User,
setCallbacksCall,
invitationCreateCall,
getPossibleUserRoles,
} from "./networking";
const AdminPanel: React.FC<AdminPanelProps> = ({
@ -83,6 +84,9 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
useState(false);
const router = useRouter();
const [baseUrl, setBaseUrl] = useState("");
const [isInstructionsModalVisible, setIsInstructionsModalVisible] = useState(false);
const [possibleUIRoles, setPossibleUIRoles] = useState<null | Record<string, Record<string, string>>>(null);
let nonSssoUrl;
try {
@ -163,6 +167,9 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
console.log(`proxy admins: ${proxyAdmins}`);
console.log(`combinedList: ${combinedList}`);
setAdmins(combinedList);
const availableUserRoles = await getPossibleUserRoles(accessToken);
setPossibleUIRoles(availableUserRoles);
}
};
@ -435,7 +442,7 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
? member["user_id"]
: null}
</TableCell>
<TableCell>{member["user_role"]}</TableCell>
<TableCell> {possibleUIRoles?.[member?.user_role]?.ui_label || "-"}</TableCell>
<TableCell>
<Icon
icon={PencilAltIcon}

View file

@ -149,6 +149,12 @@ const ChatUI: React.FC<ChatUIProps> = ({
});
};
const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
if (event.key === 'Enter') {
handleSendMessage();
}
};
const handleSendMessage = async () => {
if (inputMessage.trim() === "") return;
@ -260,6 +266,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
type="text"
value={inputMessage}
onChange={(e) => setInputMessage(e.target.value)}
onKeyDown={handleKeyDown} // Add this line
placeholder="Type your message..."
/>
<Button

View file

@ -0,0 +1,138 @@
import { useEffect, useState } from 'react';
import {
Dialog,
DialogPanel,
TextInput,
Button,
Select,
SelectItem,
Text,
Title,
Subtitle,
} from '@tremor/react';
import {
Button as Button2,
Modal,
Form,
Input,
Select as Select2,
InputNumber,
message,
} from "antd";
interface EditUserModalProps {
visible: boolean;
possibleUIRoles: null | Record<string, Record<string, string>>;
onCancel: () => void;
user: any;
onSubmit: (data: any) => void;
}
const EditUserModal: React.FC<EditUserModalProps> = ({ visible, possibleUIRoles, onCancel, user, onSubmit }) => {
const [editedUser, setEditedUser] = useState(user);
const [form] = Form.useForm();
useEffect(() => {
form.resetFields();
}, [user]);
const handleCancel = async () => {
form.resetFields();
onCancel();
};
const handleEditSubmit = async (formValues: Record<string, any>) => {
// Call API to update team with teamId and values
onSubmit(formValues);
form.resetFields();
onCancel();
};
if (!user) {
return null;
}
return (
<Modal
visible={visible}
onCancel={handleCancel}
footer={null}
title={"Edit User " + user.user_id}
width={1000}
>
<Form
form={form}
onFinish={handleEditSubmit}
initialValues={user} // Pass initial values here
labelCol={{ span: 8 }}
wrapperCol={{ span: 16 }}
labelAlign="left"
>
<>
<Form.Item
className="mt-8"
label="User Email"
tooltip="Email of the User"
name="user_email">
<TextInput />
</Form.Item>
<Form.Item
label="user_id"
name="user_id"
hidden={true}
>
<TextInput />
</Form.Item>
<Form.Item
label="User Role"
name="user_role"
>
<Select2>
{possibleUIRoles &&
Object.entries(possibleUIRoles).map(([role, { ui_label, description }]) => (
<SelectItem key={role} value={role} title={ui_label}>
<div className='flex'>
{ui_label} <p className="ml-2" style={{ color: "gray", fontSize: "12px" }}>{description}</p>
</div>
</SelectItem>
))}
</Select2>
</Form.Item>
<Form.Item
label="Spend (USD)"
name="spend"
tooltip="(float) - Spend of all LLM calls completed by this user"
>
<InputNumber min={0} step={1} />
</Form.Item>
<Form.Item
label="User Budget (USD)"
name="max_budget"
tooltip="(float) - Maximum budget of this user"
>
<InputNumber min={0} step={1} />
</Form.Item>
<div style={{ textAlign: "right", marginTop: "10px" }}>
<Button2 htmlType="submit">Save</Button2>
</div>
</>
</Form>
</Modal>
);
};
export default EditUserModal;

View file

@ -79,7 +79,7 @@ const Sidebar: React.FC<SidebarProps> = ({
{userRole == "Admin" ? (
<Menu.Item key="5" onClick={() => setPage("users")}>
<Text>Users</Text>
<Text>Internal Users</Text>
</Menu.Item>
) : null}
@ -91,7 +91,7 @@ const Sidebar: React.FC<SidebarProps> = ({
{userRole == "Admin" ? (
<Menu.Item key="9" onClick={() => setPage("budgets")}>
<Text>Rate Limits</Text>
<Text>Budgets</Text>
</Menu.Item>
) : null}

View file

@ -49,6 +49,8 @@ import {
getCallbacksCall,
setCallbacksCall,
modelSettingsCall,
adminGlobalActivityExceptions,
adminGlobalActivityExceptionsPerDeployment,
} from "./networking";
import { BarChart, AreaChart } from "@tremor/react";
import {
@ -109,6 +111,13 @@ interface RetryPolicyObject {
[key: string]: { [retryPolicyKey: string]: number } | undefined;
}
interface GlobalExceptionActivityData {
sum_num_rate_limit_exceptions: number;
daily_data: { date: string; num_rate_limit_exceptions: number; }[];
}
//["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"]
interface ProviderFields {
@ -301,6 +310,9 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
useState<RetryPolicyObject | null>(null);
const [defaultRetry, setDefaultRetry] = useState<number>(0);
const [globalExceptionData, setGlobalExceptionData] = useState<GlobalExceptionActivityData>({} as GlobalExceptionActivityData);
const [globalExceptionPerDeployment, setGlobalExceptionPerDeployment] = useState<any[]>([]);
function formatCreatedAt(createdAt: string | null) {
if (createdAt) {
const date = new Date(createdAt);
@ -643,6 +655,29 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
dateValue.to?.toISOString()
);
const dailyExceptions = await adminGlobalActivityExceptions(
accessToken,
dateValue.from?.toISOString().split('T')[0],
dateValue.to?.toISOString().split('T')[0],
_initial_model_group,
);
setGlobalExceptionData(dailyExceptions);
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
accessToken,
dateValue.from?.toISOString().split('T')[0],
dateValue.to?.toISOString().split('T')[0],
_initial_model_group,
)
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
console.log("dailyExceptions:", dailyExceptions);
console.log("dailyExceptionsPerDeplyment:", dailyExceptionsPerDeplyment);
console.log("slowResponses:", slowResponses);
setSlowResponsesData(slowResponses);
@ -905,6 +940,30 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
console.log("slowResponses:", slowResponses);
setSlowResponsesData(slowResponses);
if (modelGroup) {
const dailyExceptions = await adminGlobalActivityExceptions(
accessToken,
startTime?.toISOString().split('T')[0],
endTime?.toISOString().split('T')[0],
modelGroup,
);
setGlobalExceptionData(dailyExceptions);
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
accessToken,
startTime?.toISOString().split('T')[0],
endTime?.toISOString().split('T')[0],
modelGroup,
)
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
}
} catch (error) {
console.error("Failed to fetch model metrics", error);
}
@ -1475,7 +1534,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
)}
{selectedProvider != Providers.Bedrock &&
selectedProvider != Providers.Vertex_AI &&
dynamicProviderForm === undefined && (
(dynamicProviderForm === undefined ||
dynamicProviderForm.fields.length == 0) && (
<Form.Item
rules={[{ required: true, message: "Required" }]}
label="API Key"
@ -1777,18 +1837,110 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
</Card>
</Col>
</Grid>
<Card className="mt-4">
<Title>Exceptions per Model</Title>
<BarChart
className="h-72"
data={modelExceptions}
index="model"
categories={allExceptions}
stack={true}
colors={["indigo-300", "rose-200", "#ffcc33"]}
yAxisWidth={30}
/>
</Card>
<Grid numItems={1} className="gap-2 w-full mt-2">
<Card>
<Title>All Up Rate Limit Errors (429) for {selectedModelGroup}</Title>
<Grid numItems={1}>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)}</Subtitle>
<BarChart
className="h-40"
data={globalExceptionData.daily_data}
index="date"
colors={['rose']}
categories={['num_rate_limit_exceptions']}
onValueChange={(v) => console.log(v)}
/>
</Col>
<Col>
{/* <BarChart
className="h-40"
data={modelExceptions}
index="model"
categories={allExceptions}
stack={true}
yAxisWidth={30}
/> */}
</Col>
</Grid>
</Card>
{
premiumUser ? (
<>
{globalExceptionPerDeployment.map((globalActivity, index) => (
<Card key={index}>
<Title>{globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"}</Title>
<Grid numItems={1}>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)}</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['rose']}
categories={['num_rate_limit_exceptions']}
onValueChange={(v) => console.log(v)}
/>
</Col>
</Grid>
</Card>
))}
</>
) :
<>
{globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 &&
globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => (
<Card key={index}>
<Title> Rate Limit Errors by Deployment</Title>
<p className="mb-2 text-gray-500 italic text-[12px]">Upgrade to see exceptions for all deployments</p>
<Button variant="primary" className="mb-2">
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
Get Free Trial
</a>
</Button>
<Card>
<Title>{globalActivity.api_base}</Title>
<Grid numItems={1}>
<Col>
<Subtitle
style={{
fontSize: "15px",
fontWeight: "normal",
color: "#535452",
}}
>
Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)}
</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['rose']}
categories={['num_rate_limit_exceptions']}
onValueChange={(v) => console.log(v)}
/>
</Col>
</Grid>
</Card>
</Card>
))}
</>
}
</Grid>
</TabPanel>
<TabPanel>
<div className="flex items-center">

View file

@ -39,7 +39,9 @@ const Navbar: React.FC<NavbarProps> = ({
// const userColors = require('./ui_colors.json') || {};
const isLocal = process.env.NODE_ENV === "development";
const proxyBaseUrl = isLocal ? "http://localhost:4000" : null;
const imageUrl = isLocal ? "http://localhost:4000/get_image" : "/get_image";
const logoutUrl = proxyBaseUrl ? `${proxyBaseUrl}` : `/`;
const items: MenuProps["items"] = [
{
@ -52,6 +54,14 @@ const Navbar: React.FC<NavbarProps> = ({
</>
),
},
{
key: "2",
label: (
<Link href={logoutUrl}>
<p>Logout</p>
</Link>
),
}
];
return (

View file

@ -1270,6 +1270,100 @@ export const adminGlobalActivityPerModel = async (
}
};
export const adminGlobalActivityExceptions = async (
accessToken: String,
startTime: String | undefined,
endTime: String | undefined,
modelGroup: String,
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/activity/exceptions`
: `/global/activity/exceptions`;
if (startTime && endTime) {
url += `?start_date=${startTime}&end_date=${endTime}`;
}
if (modelGroup) {
url += `&model_group=${modelGroup}`;
}
const requestOptions: {
method: string;
headers: {
Authorization: string;
};
} = {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
},
};
const response = await fetch(url, requestOptions);
if (!response.ok) {
const errorData = await response.text();
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
return data;
} catch (error) {
console.error("Failed to fetch spend data:", error);
throw error;
}
};
export const adminGlobalActivityExceptionsPerDeployment = async (
accessToken: String,
startTime: String | undefined,
endTime: String | undefined,
modelGroup: String,
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/global/activity/exceptions/deployment`
: `/global/activity/exceptions/deployment`;
if (startTime && endTime) {
url += `?start_date=${startTime}&end_date=${endTime}`;
}
if (modelGroup) {
url += `&model_group=${modelGroup}`;
}
const requestOptions: {
method: string;
headers: {
Authorization: string;
};
} = {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
},
};
const response = await fetch(url, requestOptions);
if (!response.ok) {
const errorData = await response.text();
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log(data);
return data;
} catch (error) {
console.error("Failed to fetch spend data:", error);
throw error;
}
};
export const adminTopModelsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
@ -1465,6 +1559,34 @@ export const userGetAllUsersCall = async (
}
};
export const getPossibleUserRoles = async (
accessToken: String,
) => {
try {
const url = proxyBaseUrl
? `${proxyBaseUrl}/user/available_roles`
: `/user/available_roles`;
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
throw new Error("Network response was not ok");
}
const data = await response.json();
console.log("response from user/available_role", data);
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
throw error;
}
};
export const teamCreateCall = async (
accessToken: string,
formValues: Record<string, any> // Assuming formValues is an object

View file

@ -188,6 +188,43 @@ const Settings: React.FC<SettingsPageProps> = ({
console.log("Selected values:", values);
};
const handleSaveEmailSettings = () => {
if (!accessToken) {
return;
}
let updatedVariables: Record<string, string> = {};
alerts
.filter((alert) => alert.name === "email")
.forEach((alert) => {
Object.entries(alert.variables ?? {}).forEach(([key, value]) => {
const inputElement = document.querySelector(`input[name="${key}"]`) as HTMLInputElement;
if (inputElement && inputElement.value) {
updatedVariables[key] = inputElement?.value;
}
});
});
console.log("updatedVariables", updatedVariables);
//filter out null / undefined values for updatedVariables
const payload = {
general_settings: {
alerting: ["email"],
},
environment_variables: updatedVariables,
};
try {
setCallbacksCall(accessToken, payload);
} catch (error) {
message.error("Failed to update alerts: " + error, 20);
}
message.success("Email settings updated successfully");
}
const handleSaveAlerts = () => {
if (!accessToken) {
return;
@ -369,7 +406,8 @@ const Settings: React.FC<SettingsPageProps> = ({
<TabList variant="line" defaultValue="1">
<Tab value="1">Logging Callbacks</Tab>
<Tab value="2">Alerting Types</Tab>
<Tab value="2">Alerting Settings</Tab>
<Tab value="3">Alerting Settings</Tab>
<Tab value="4">Email Alerts</Tab>
</TabList>
<TabPanels>
<TabPanel>
@ -526,6 +564,142 @@ const Settings: React.FC<SettingsPageProps> = ({
premiumUser={premiumUser}
/>
</TabPanel>
<TabPanel>
<Card>
<Title>Email Settings</Title>
<Text>
<a href="https://docs.litellm.ai/docs/proxy/email" target="_blank" style={{ color: "blue" }}> LiteLLM Docs: email alerts</a> <br/>
</Text>
<div className="flex w-full">
{alerts
.filter((alert) => alert.name === "email")
.map((alert, index) => (
<TableCell key={index}>
<ul>
<Grid numItems={2}>
{Object.entries(alert.variables ?? {}).map(([key, value]) => (
<li key={key} className="mx-2 my-2">
{ premiumUser!= true && (key === "EMAIL_LOGO_URL" || key === "EMAIL_SUPPORT_CONTACT") ? (
<div>
<a
href="https://forms.gle/W3U4PZpJGFHWtHyA9"
target="_blank"
>
<Text className="mt-2">
{" "}
{key}
</Text>
</a>
<TextInput
name={key}
defaultValue={value as string}
type="password"
disabled={true}
style={{ width: "400px" }}
/>
</div>
) : (
<div>
<Text className="mt-2">{key}</Text>
<TextInput
name={key}
defaultValue={value as string}
type="password"
style={{ width: "400px" }}
/>
</div>
)}
{/* Added descriptions for input fields */}
<p style={{ fontSize: "small", fontStyle: "italic" }}>
{key === "SMTP_HOST" && (
<div style={{ color: "gray" }}>
Enter the SMTP host address, e.g. `smtp.resend.com`
<span style={{ color: "red" }}> Required * </span>
</div>
)}
{key === "SMTP_PORT" && (
<div style={{ color: "gray" }}>
Enter the SMTP port number, e.g. `587`
<span style={{ color: "red" }}> Required * </span>
</div>
)}
{key === "SMTP_USERNAME" && (
<div style={{ color: "gray" }}>
Enter the SMTP username, e.g. `username`
<span style={{ color: "red" }}> Required * </span>
</div>
)}
{key === "SMTP_PASSWORD" && (
<span style={{ color: "red" }}> Required * </span>
)}
{key === "SMTP_SENDER_EMAIL" && (
<div style={{ color: "gray" }}>
Enter the sender email address, e.g. `sender@berri.ai`
<span style={{ color: "red" }}> Required * </span>
</div>
)}
{key === "TEST_EMAIL_ADDRESS" && (
<div style={{ color: "gray" }}>
Email Address to send `Test Email Alert` to. example: `info@berri.ai`
<span style={{ color: "red" }}> Required * </span>
</div>
)
}
{key === "EMAIL_LOGO_URL" && (
<div style={{ color: "gray" }}>
(Optional) Customize the Logo that appears in the email, pass a url to your logo
</div>
)
}
{key === "EMAIL_SUPPORT_CONTACT" && (
<div style={{ color: "gray" }}>
(Optional) Customize the support email address that appears in the email. Default is support@berri.ai
</div>
)
}
</p>
</li>
))}
</Grid>
</ul>
</TableCell>
))}
</div>
<Button
className="mt-2"
onClick={() => handleSaveEmailSettings()}
>
Save Changes
</Button>
<Button
onClick={() =>
serviceHealthCheck(accessToken, "email")
}
className="mx-2"
>
Test Email Alerts
</Button>
</Card>
</TabPanel>
</TabPanels>
</TabGroup>
</Grid>

View file

@ -162,6 +162,17 @@ const UsagePage: React.FC<UsagePageProps> = ({
console.log("keys in usage", keys);
console.log("premium user in usage", premiumUser);
function valueFormatterNumbers(number: number) {
const formatter = new Intl.NumberFormat('en-US', {
maximumFractionDigits: 0,
notation: 'compact',
compactDisplay: 'short',
});
return formatter.format(number);
}
const updateEndUserData = async (startTime: Date | undefined, endTime: Date | undefined, uiSelectedKey: string | null) => {
if (!startTime || !endTime || !accessToken) {
return;
@ -482,10 +493,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
<Title>All Up</Title>
<Grid numItems={2}>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {globalActivity.sum_api_requests}</Subtitle>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests { valueFormatterNumbers(globalActivity.sum_api_requests)}</Subtitle>
<AreaChart
className="h-40"
data={globalActivity.daily_data}
valueFormatter={valueFormatterNumbers}
index="date"
colors={['cyan']}
categories={['api_requests']}
@ -494,10 +506,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
</Col>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {globalActivity.sum_total_tokens}</Subtitle>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens { valueFormatterNumbers(globalActivity.sum_total_tokens)}</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
valueFormatter={valueFormatterNumbers}
index="date"
colors={['cyan']}
categories={['total_tokens']}
@ -517,24 +530,26 @@ const UsagePage: React.FC<UsagePageProps> = ({
<Title>{globalActivity.model}</Title>
<Grid numItems={2}>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {globalActivity.sum_api_requests}</Subtitle>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {valueFormatterNumbers(globalActivity.sum_api_requests)}</Subtitle>
<AreaChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['cyan']}
categories={['api_requests']}
valueFormatter={valueFormatterNumbers}
onValueChange={(v) => console.log(v)}
/>
</Col>
<Col>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {globalActivity.sum_total_tokens}</Subtitle>
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {valueFormatterNumbers(globalActivity.sum_total_tokens)}</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['cyan']}
categories={['total_tokens']}
valueFormatter={valueFormatterNumbers}
onValueChange={(v) => console.log(v)}
/>
</Col>
@ -565,7 +580,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
color: "#535452",
}}
>
API Requests {globalActivity.sum_api_requests}
API Requests {valueFormatterNumbers(globalActivity.sum_api_requests)}
</Subtitle>
<AreaChart
className="h-40"
@ -573,6 +588,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
index="date"
colors={['cyan']}
categories={['api_requests']}
valueFormatter={valueFormatterNumbers}
onValueChange={(v) => console.log(v)}
/>
</Col>
@ -584,13 +600,14 @@ const UsagePage: React.FC<UsagePageProps> = ({
color: "#535452",
}}
>
Tokens {globalActivity.sum_total_tokens}
Tokens {valueFormatterNumbers(globalActivity.sum_total_tokens)}
</Subtitle>
<BarChart
className="h-40"
data={globalActivity.daily_data}
index="date"
colors={['cyan']}
valueFormatter={valueFormatterNumbers}
categories={['total_tokens']}
onValueChange={(v) => console.log(v)}
/>

View file

@ -24,12 +24,22 @@ import {
Icon,
TextInput,
} from "@tremor/react";
import { userInfoCall } from "./networking";
import {
message,
} from "antd";
import { userInfoCall, userUpdateUserCall, getPossibleUserRoles } from "./networking";
import { Badge, BadgeDelta, Button } from "@tremor/react";
import RequestAccess from "./request_model_access";
import CreateUser from "./create_user_button";
import EditUserModal from "./edit_user";
import Paragraph from "antd/es/skeleton/Paragraph";
import InformationCircleIcon from "@heroicons/react/outline/InformationCircleIcon";
import {
PencilAltIcon,
InformationCircleIcon,
TrashIcon,
} from "@heroicons/react/outline";
interface ViewUserDashboardProps {
accessToken: string | null;
@ -55,8 +65,40 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
const [currentPage, setCurrentPage] = useState(0);
const [openDialogId, setOpenDialogId] = React.useState<null | number>(null);
const [selectedItem, setSelectedItem] = useState<null | any>(null);
const [editModalVisible, setEditModalVisible] = useState(false);
const [selectedUser, setSelectedUser] = useState(null);
const [possibleUIRoles, setPossibleUIRoles] = useState<Record<string, Record<string, string>>>({});
const defaultPageSize = 25;
const handleEditCancel = async () => {
setSelectedUser(null);
setEditModalVisible(false);
};
const handleEditSubmit = async (editedUser: any) => {
console.log("inside handleEditSubmit:", editedUser);
if (!accessToken || !token || !userRole || !userID) {
return;
}
try {
await userUpdateUserCall(accessToken, editedUser, null);
message.success(`User ${editedUser.user_id} updated successfully`);
} catch (error) {
console.error("There was an error updating the user", error);
}
if (userData) {
const updatedUserData = userData.map((user) =>
user.user_id === editedUser.user_id ? editedUser : user
);
setUserData(updatedUserData);
}
setSelectedUser(null);
setEditModalVisible(false);
// Close the modal
};
useEffect(() => {
if (!accessToken || !token || !userRole || !userID) {
return;
@ -74,11 +116,16 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
);
console.log("user data response:", userDataResponse);
setUserData(userDataResponse);
const availableUserRoles = await getPossibleUserRoles(accessToken);
setPossibleUIRoles(availableUserRoles);
} catch (error) {
console.error("There was an error fetching the model data", error);
}
};
if (accessToken && token && userRole && userID) {
fetchData();
}
@ -126,14 +173,10 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
return (
<div style={{ width: "100%" }}>
<Grid className="gap-2 p-2 h-[80vh] w-full mt-8">
<Grid className="gap-2 p-2 h-[90vh] w-full mt-8">
<CreateUser userID={userID} accessToken={accessToken} teams={teams} />
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4">
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[90vh] mb-4">
<div className="mb-4 mt-1">
<Text>
These are Users on LiteLLM that created API Keys. Automatically
tracked by LiteLLM
</Text>
</div>
<TabGroup>
<TabPanels>
@ -143,25 +186,23 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
<TableRow>
<TableHeaderCell>User ID</TableHeaderCell>
<TableHeaderCell>User Email</TableHeaderCell>
<TableHeaderCell>User Models</TableHeaderCell>
<TableHeaderCell>Role</TableHeaderCell>
<TableHeaderCell>User Spend ($ USD)</TableHeaderCell>
<TableHeaderCell>User Max Budget ($ USD)</TableHeaderCell>
<TableHeaderCell>User API Key Aliases</TableHeaderCell>
<TableHeaderCell>API Keys</TableHeaderCell>
<TableHeaderCell></TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{userData.map((user: any) => (
<TableRow key={user.user_id}>
<TableCell>{user.user_id}</TableCell>
<TableCell>{user.user_email}</TableCell>
<TableCell>{user.user_id || "-"}</TableCell>
<TableCell>{user.user_email || "-"}</TableCell>
<TableCell>
{user.models && user.models.length > 0
? user.models
: "All Models"}
{possibleUIRoles?.[user?.user_role]?.ui_label || "-"}
</TableCell>
<TableCell>
{user.spend ? user.spend?.toFixed(2) : 0}
{user.spend ? user.spend?.toFixed(2) : "-"}
</TableCell>
<TableCell>
{user.max_budget ? user.max_budget : "Unlimited"}
@ -173,9 +214,13 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
(key: any) => key !== null
).length > 0 ? (
<Badge size={"xs"} color={"indigo"}>
{user.key_aliases
.filter((key: any) => key !== null)
.join(", ")}
{
user.key_aliases.filter(
(key: any) => key !== null
).length
}
&nbsp;Keys
</Badge>
) : (
<Badge size={"xs"} color={"gray"}>
@ -188,12 +233,23 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
</Badge>
)}
{/* <Text>{user.key_aliases.filter(key => key !== null).length} Keys</Text> */}
{/* <Icon icon={InformationCircleIcon} onClick= {() => {
</Grid>
</TableCell>
<TableCell>
<Icon icon={PencilAltIcon} onClick= {() => {
setSelectedUser(user)
setEditModalVisible(true)
}}>View Keys</Icon>
{/*
<Icon icon={TrashIcon} onClick= {() => {
setOpenDialogId(user.user_id)
setSelectedItem(user)
}}>View Keys</Icon> */}
</Grid>
</TableCell>
</TableRow>
))}
</TableBody>
@ -226,30 +282,16 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
</TabPanel>
</TabPanels>
</TabGroup>
<EditUserModal
visible={editModalVisible}
possibleUIRoles={possibleUIRoles}
onCancel={handleEditCancel}
user={selectedUser}
onSubmit={handleEditSubmit}
/>
</Card>
{renderPagination()}
</Grid>
{/* <Dialog
open={openDialogId !== null}
onClose={() => {
setOpenDialogId(null);
}}
>
<DialogPanel>
<div className="grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3">
<Title>Key Aliases</Title>
<Text>
{selectedItem && selectedItem.key_aliases
? selectedItem.key_aliases.filter(key => key !== null).length > 0
? selectedItem.key_aliases.filter(key => key !== null).join(', ')
: 'No Keys'
: "No Keys"}
</Text>
</div>
</DialogPanel>
</Dialog> */}
</div>
);
};