forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_invite_link_flow_2
This commit is contained in:
commit
f9862be049
100 changed files with 5297 additions and 883 deletions
|
@ -2,7 +2,7 @@ version: 4.3.4
|
||||||
jobs:
|
jobs:
|
||||||
local_testing:
|
local_testing:
|
||||||
docker:
|
docker:
|
||||||
- image: circleci/python:3.9
|
- image: circleci/python:3.11.8
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
@ -43,7 +43,7 @@ jobs:
|
||||||
pip install "langfuse==2.27.1"
|
pip install "langfuse==2.27.1"
|
||||||
pip install "logfire==0.29.0"
|
pip install "logfire==0.29.0"
|
||||||
pip install numpydoc
|
pip install numpydoc
|
||||||
pip install traceloop-sdk==0.18.2
|
pip install traceloop-sdk==0.21.1
|
||||||
pip install openai
|
pip install openai
|
||||||
pip install prisma
|
pip install prisma
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
|
@ -61,6 +61,7 @@ jobs:
|
||||||
pip install prometheus-client==0.20.0
|
pip install prometheus-client==0.20.0
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.7.1"
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
|
pip install "Pillow==10.3.0"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
|
|
@ -7,6 +7,5 @@ cohere
|
||||||
redis
|
redis
|
||||||
anthropic
|
anthropic
|
||||||
orjson
|
orjson
|
||||||
pydantic==1.10.14
|
pydantic==2.7.1
|
||||||
google-cloud-aiplatform==1.43.0
|
google-cloud-aiplatform==1.43.0
|
||||||
redisvl==0.0.7 # semantic caching
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Batching Completion()
|
# Batching Completion()
|
||||||
LiteLLM allows you to:
|
LiteLLM allows you to:
|
||||||
* Send many completion calls to 1 model
|
* Send many completion calls to 1 model
|
||||||
|
@ -51,6 +54,9 @@ This makes parallel calls to the specified `models` and returns the first respon
|
||||||
|
|
||||||
Use this to reduce latency
|
Use this to reduce latency
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
### Example Code
|
### Example Code
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -68,8 +74,93 @@ response = batch_completion_models(
|
||||||
print(result)
|
print(result)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
[how to setup proxy config](#example-setup)
|
||||||
|
|
||||||
|
Just pass a comma-separated string of model names and the flag `fastest_response=True`.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
|
||||||
|
curl -X POST 'http://localhost:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "gpt-4o, groq-llama", # 👈 Comma-separated models
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in Boston today?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": true,
|
||||||
|
"fastest_response": true # 👈 FLAG
|
||||||
|
}
|
||||||
|
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="openai" label="OpenAI SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-4o, groq-llama", # 👈 Comma-separated models
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={"fastest_response": true} # 👈 FLAG
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Example Setup:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: groq-llama
|
||||||
|
litellm_params:
|
||||||
|
model: groq/llama3-8b-8192
|
||||||
|
api_key: os.environ/GROQ_API_KEY
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
### Output
|
### Output
|
||||||
Returns the first response
|
Returns the first response in OpenAI format. Cancels other LLM API calls.
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
|
@ -95,6 +186,7 @@ Returns the first response
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Send 1 completion call to many models: Return All Responses
|
## Send 1 completion call to many models: Return All Responses
|
||||||
This makes parallel calls to the specified models and returns all responses
|
This makes parallel calls to the specified models and returns all responses
|
||||||
|
|
||||||
|
|
|
@ -178,23 +178,26 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**API Spec for Webhook Event**
|
## **API Spec for Webhook Event**
|
||||||
|
|
||||||
- `spend` *float*: The current spend amount for the 'event_group'.
|
- `spend` *float*: The current spend amount for the 'event_group'.
|
||||||
- `max_budget` *float*: The maximum allowed budget for the 'event_group'.
|
- `max_budget` *float or null*: The maximum allowed budget for the 'event_group'. null if not set.
|
||||||
- `token` *str*: A hashed value of the key, used for authentication or identification purposes.
|
- `token` *str*: A hashed value of the key, used for authentication or identification purposes.
|
||||||
- `user_id` *str or null*: The ID of the user associated with the event (optional).
|
- `customer_id` *str or null*: The ID of the customer associated with the event (optional).
|
||||||
|
- `internal_user_id` *str or null*: The ID of the internal user associated with the event (optional).
|
||||||
- `team_id` *str or null*: The ID of the team associated with the event (optional).
|
- `team_id` *str or null*: The ID of the team associated with the event (optional).
|
||||||
- `user_email` *str or null*: The email of the user associated with the event (optional).
|
- `user_email` *str or null*: The email of the internal user associated with the event (optional).
|
||||||
- `key_alias` *str or null*: An alias for the key associated with the event (optional).
|
- `key_alias` *str or null*: An alias for the key associated with the event (optional).
|
||||||
- `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
|
- `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
|
||||||
- `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
|
- `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
|
||||||
- `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
|
- `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
|
||||||
|
* "spend_tracked": Emitted whenver spend is tracked for a customer id.
|
||||||
* "budget_crossed": Indicates that the spend has exceeded the max budget.
|
* "budget_crossed": Indicates that the spend has exceeded the max budget.
|
||||||
* "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
|
* "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
|
||||||
* "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
|
* "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
|
||||||
- `event_group` *Literal["user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
|
- `event_group` *Literal["customer", "internal_user", "key", "team", "proxy"]*: The group associated with the event. Possible values are:
|
||||||
* "user": The event is related to a specific user.
|
* "customer": The event is related to a specific customer
|
||||||
|
* "internal_user": The event is related to a specific internal user.
|
||||||
* "key": The event is related to a specific key.
|
* "key": The event is related to a specific key.
|
||||||
* "team": The event is related to a team.
|
* "team": The event is related to a team.
|
||||||
* "proxy": The event is related to a proxy.
|
* "proxy": The event is related to a proxy.
|
||||||
|
|
251
docs/my-website/docs/proxy/customers.md
Normal file
251
docs/my-website/docs/proxy/customers.md
Normal file
|
@ -0,0 +1,251 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# 🙋♂️ Customers
|
||||||
|
|
||||||
|
Track spend, set budgets for your customers.
|
||||||
|
|
||||||
|
## Tracking Customer Credit
|
||||||
|
|
||||||
|
### 1. Make LLM API call w/ Customer ID
|
||||||
|
|
||||||
|
Make a /chat/completions call, pass 'user' - First call Works
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \ # 👈 YOUR PROXY KEY
|
||||||
|
--data ' {
|
||||||
|
"model": "azure-gpt-3.5",
|
||||||
|
"user": "ishaan3", # 👈 CUSTOMER ID
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what time is it"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The customer_id will be upserted into the DB with the new spend.
|
||||||
|
|
||||||
|
If the customer_id already exists, spend will be incremented.
|
||||||
|
|
||||||
|
### 2. Get Customer Spend
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="all-up" label="All-up spend">
|
||||||
|
|
||||||
|
Call `/customer/info` to get a customer's all up spend
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X GET 'http://0.0.0.0:4000/customer/info?end_user_id=ishaan3' \ # 👈 CUSTOMER ID
|
||||||
|
-H 'Authorization: Bearer sk-1234' \ # 👈 YOUR PROXY KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"user_id": "ishaan3",
|
||||||
|
"blocked": false,
|
||||||
|
"alias": null,
|
||||||
|
"spend": 0.001413,
|
||||||
|
"allowed_model_region": null,
|
||||||
|
"default_model": null,
|
||||||
|
"litellm_budget_table": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="event-webhook" label="Event Webhook">
|
||||||
|
|
||||||
|
To update spend in your client-side DB, point the proxy to your webhook.
|
||||||
|
|
||||||
|
E.g. if your server is `https://webhook.site` and your listening on `6ab090e8-c55f-4a23-b075-3209f5c57906`
|
||||||
|
|
||||||
|
1. Add webhook url to your proxy environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export WEBHOOK_URL="https://webhook.site/6ab090e8-c55f-4a23-b075-3209f5c57906"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Add 'webhook' to config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
alerting: ["webhook"] # 👈 KEY CHANGE
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "mistral",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in Boston today?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"user": "krrish12"
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"spend": 0.0011120000000000001, # 👈 SPEND
|
||||||
|
"max_budget": null,
|
||||||
|
"token": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||||
|
"customer_id": "krrish12", # 👈 CUSTOMER ID
|
||||||
|
"user_id": null,
|
||||||
|
"team_id": null,
|
||||||
|
"user_email": null,
|
||||||
|
"key_alias": null,
|
||||||
|
"projected_exceeded_date": null,
|
||||||
|
"projected_spend": null,
|
||||||
|
"event": "spend_tracked",
|
||||||
|
"event_group": "customer",
|
||||||
|
"event_message": "Customer spend tracked. Customer=krrish12, spend=0.0011120000000000001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
[See Webhook Spec](./alerting.md#api-spec-for-webhook-event)
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Setting Customer Budgets
|
||||||
|
|
||||||
|
Set customer budgets (e.g. monthly budgets, tpm/rpm limits) on LiteLLM Proxy
|
||||||
|
|
||||||
|
### Quick Start
|
||||||
|
|
||||||
|
Create / Update a customer with budget
|
||||||
|
|
||||||
|
**Create New Customer w/ budget**
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/customer/new'
|
||||||
|
-H 'Authorization: Bearer sk-1234'
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
-D '{
|
||||||
|
"user_id" : "my-customer-id",
|
||||||
|
"max_budget": "0", # 👈 CAN BE FLOAT
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Test it!**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "mistral",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What'\''s the weather like in Boston today?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"user": "ishaan-jaff-48"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Assign Pricing Tiers
|
||||||
|
|
||||||
|
Create and assign customers to pricing tiers.
|
||||||
|
|
||||||
|
#### 1. Create a budget
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="ui" label="UI">
|
||||||
|
|
||||||
|
- Go to the 'Budgets' tab on the UI.
|
||||||
|
- Click on '+ Create Budget'.
|
||||||
|
- Create your pricing tier (e.g. 'my-free-tier' with budget $4). This means each user on this pricing tier will have a max budget of $4.
|
||||||
|
|
||||||
|
<Image img={require('../../img/create_budget_modal.png')} />
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="api" label="API">
|
||||||
|
|
||||||
|
Use the `/budget/new` endpoint for creating a new budget. [API Reference](https://litellm-api.up.railway.app/#/budget%20management/new_budget_budget_new_post)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/budget/new' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"budget_id": "my-free-tier",
|
||||||
|
"max_budget": 4
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
#### 2. Assign Budget to Customer
|
||||||
|
|
||||||
|
In your application code, assign budget when creating a new customer.
|
||||||
|
|
||||||
|
Just use the `budget_id` used when creating the budget. In our example, this is `my-free-tier`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/customer/new' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"user_id": "my-customer-id",
|
||||||
|
"budget_id": "my-free-tier" # 👈 KEY CHANGE
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Test it!
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/customer/new' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"user_id": "my-customer-id",
|
||||||
|
"budget_id": "my-free-tier" # 👈 KEY CHANGE
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="<your_proxy_base_url",
|
||||||
|
api_key="<your_proxy_key>"
|
||||||
|
)
|
||||||
|
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "Hello!"}
|
||||||
|
],
|
||||||
|
user="my-customer-id"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
|
@ -223,7 +223,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
|
||||||
Error
|
Error
|
||||||
```shell
|
```shell
|
||||||
{"error":{"message":"Authentication Error, ExceededBudget: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%
|
{"error":{"message":"Budget has been exceeded: User ishaan3 has exceeded their budget. Current spend: 0.0008869999999999999; Max Budget: 0.0001","type":"auth_error","param":"None","code":401}}%
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
BIN
docs/my-website/img/create_budget_modal.png
Normal file
BIN
docs/my-website/img/create_budget_modal.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 193 KiB |
|
@ -41,6 +41,7 @@ const sidebars = {
|
||||||
"proxy/reliability",
|
"proxy/reliability",
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
"proxy/users",
|
"proxy/users",
|
||||||
|
"proxy/customers",
|
||||||
"proxy/billing",
|
"proxy/billing",
|
||||||
"proxy/user_keys",
|
"proxy/user_keys",
|
||||||
"proxy/enterprise",
|
"proxy/enterprise",
|
||||||
|
|
|
@ -6,7 +6,13 @@ warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*
|
||||||
import threading, requests, os
|
import threading, requests, os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger, json_logs
|
from litellm._logging import (
|
||||||
|
set_verbose,
|
||||||
|
_turn_on_debug,
|
||||||
|
verbose_logger,
|
||||||
|
json_logs,
|
||||||
|
_turn_on_json,
|
||||||
|
)
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
KeyManagementSystem,
|
KeyManagementSystem,
|
||||||
KeyManagementSettings,
|
KeyManagementSettings,
|
||||||
|
@ -221,7 +227,7 @@ default_team_settings: Optional[List] = None
|
||||||
max_user_budget: Optional[float] = None
|
max_user_budget: Optional[float] = None
|
||||||
max_end_user_budget: Optional[float] = None
|
max_end_user_budget: Optional[float] = None
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
request_timeout: Optional[float] = 6000
|
request_timeout: float = 6000
|
||||||
num_retries: Optional[int] = None # per model endpoint
|
num_retries: Optional[int] = None # per model endpoint
|
||||||
default_fallbacks: Optional[List] = None
|
default_fallbacks: Optional[List] = None
|
||||||
fallbacks: Optional[List] = None
|
fallbacks: Optional[List] = None
|
||||||
|
@ -298,6 +304,7 @@ api_base = None
|
||||||
headers = None
|
headers = None
|
||||||
api_version = None
|
api_version = None
|
||||||
organization = None
|
organization = None
|
||||||
|
project = None
|
||||||
config_path = None
|
config_path = None
|
||||||
####### COMPLETION MODELS ###################
|
####### COMPLETION MODELS ###################
|
||||||
open_ai_chat_completion_models: List = []
|
open_ai_chat_completion_models: List = []
|
||||||
|
@ -797,3 +804,4 @@ from .budget_manager import BudgetManager
|
||||||
from .proxy.proxy_cli import run_server
|
from .proxy.proxy_cli import run_server
|
||||||
from .router import Router
|
from .router import Router
|
||||||
from .assistants.main import *
|
from .assistants.main import *
|
||||||
|
from .batches.main import *
|
||||||
|
|
|
@ -39,6 +39,16 @@ verbose_proxy_logger.addHandler(handler)
|
||||||
verbose_logger.addHandler(handler)
|
verbose_logger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
|
def _turn_on_json():
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setLevel(logging.DEBUG)
|
||||||
|
handler.setFormatter(JsonFormatter())
|
||||||
|
|
||||||
|
verbose_router_logger.addHandler(handler)
|
||||||
|
verbose_proxy_logger.addHandler(handler)
|
||||||
|
verbose_logger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
def _turn_on_debug():
|
def _turn_on_debug():
|
||||||
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug
|
verbose_logger.setLevel(level=logging.DEBUG) # set package log to debug
|
||||||
verbose_router_logger.setLevel(level=logging.DEBUG) # set router logs to debug
|
verbose_router_logger.setLevel(level=logging.DEBUG) # set router logs to debug
|
||||||
|
|
589
litellm/batches/main.py
Normal file
589
litellm/batches/main.py
Normal file
|
@ -0,0 +1,589 @@
|
||||||
|
"""
|
||||||
|
Main File for Batches API implementation
|
||||||
|
|
||||||
|
https://platform.openai.com/docs/api-reference/batch
|
||||||
|
|
||||||
|
- create_batch()
|
||||||
|
- retrieve_batch()
|
||||||
|
- cancel_batch()
|
||||||
|
- list_batch()
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
from functools import partial
|
||||||
|
import contextvars
|
||||||
|
from typing import Literal, Optional, Dict, Coroutine, Any, Union
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import client
|
||||||
|
from litellm.utils import supports_httpx_timeout
|
||||||
|
from ..types.router import *
|
||||||
|
from ..llms.openai import OpenAIBatchesAPI, OpenAIFilesAPI
|
||||||
|
from ..types.llms.openai import (
|
||||||
|
CreateBatchRequest,
|
||||||
|
RetrieveBatchRequest,
|
||||||
|
CancelBatchRequest,
|
||||||
|
CreateFileRequest,
|
||||||
|
FileTypes,
|
||||||
|
FileObject,
|
||||||
|
Batch,
|
||||||
|
FileContentRequest,
|
||||||
|
HttpxBinaryResponseContent,
|
||||||
|
)
|
||||||
|
|
||||||
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
|
openai_batches_instance = OpenAIBatchesAPI()
|
||||||
|
openai_files_instance = OpenAIFilesAPI()
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
|
||||||
|
async def acreate_file(
|
||||||
|
file: FileTypes,
|
||||||
|
purpose: Literal["assistants", "batch", "fine-tune"],
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Coroutine[Any, Any, FileObject]:
|
||||||
|
"""
|
||||||
|
Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||||
|
|
||||||
|
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
kwargs["acreate_file"] = True
|
||||||
|
|
||||||
|
# Use a partial function to pass your keyword arguments
|
||||||
|
func = partial(
|
||||||
|
create_file,
|
||||||
|
file,
|
||||||
|
purpose,
|
||||||
|
custom_llm_provider,
|
||||||
|
extra_headers,
|
||||||
|
extra_body,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add the context to the function
|
||||||
|
ctx = contextvars.copy_context()
|
||||||
|
func_with_context = partial(ctx.run, func)
|
||||||
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
if asyncio.iscoroutine(init_response):
|
||||||
|
response = await init_response
|
||||||
|
else:
|
||||||
|
response = init_response # type: ignore
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def create_file(
|
||||||
|
file: FileTypes,
|
||||||
|
purpose: Literal["assistants", "batch", "fine-tune"],
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
|
||||||
|
"""
|
||||||
|
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||||
|
|
||||||
|
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
optional_params = GenericLiteLLMParams(**kwargs)
|
||||||
|
if custom_llm_provider == "openai":
|
||||||
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
|
api_base = (
|
||||||
|
optional_params.api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or os.getenv("OPENAI_API_BASE")
|
||||||
|
or "https://api.openai.com/v1"
|
||||||
|
)
|
||||||
|
organization = (
|
||||||
|
optional_params.organization
|
||||||
|
or litellm.organization
|
||||||
|
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
)
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
optional_params.api_key
|
||||||
|
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||||
|
or litellm.openai_key
|
||||||
|
or os.getenv("OPENAI_API_KEY")
|
||||||
|
)
|
||||||
|
### TIMEOUT LOGIC ###
|
||||||
|
timeout = (
|
||||||
|
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||||
|
)
|
||||||
|
# set timeout for 10 minutes by default
|
||||||
|
|
||||||
|
if (
|
||||||
|
timeout is not None
|
||||||
|
and isinstance(timeout, httpx.Timeout)
|
||||||
|
and supports_httpx_timeout(custom_llm_provider) == False
|
||||||
|
):
|
||||||
|
read_timeout = timeout.read or 600
|
||||||
|
timeout = read_timeout # default 10 min timeout
|
||||||
|
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||||
|
timeout = float(timeout) # type: ignore
|
||||||
|
elif timeout is None:
|
||||||
|
timeout = 600.0
|
||||||
|
|
||||||
|
_create_file_request = CreateFileRequest(
|
||||||
|
file=file,
|
||||||
|
purpose=purpose,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_body=extra_body,
|
||||||
|
)
|
||||||
|
|
||||||
|
_is_async = kwargs.pop("acreate_file", False) is True
|
||||||
|
|
||||||
|
response = openai_files_instance.create_file(
|
||||||
|
_is_async=_is_async,
|
||||||
|
api_base=api_base,
|
||||||
|
api_key=api_key,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=optional_params.max_retries,
|
||||||
|
organization=organization,
|
||||||
|
create_file_data=_create_file_request,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise litellm.exceptions.BadRequestError(
|
||||||
|
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||||
|
custom_llm_provider
|
||||||
|
),
|
||||||
|
model="n/a",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=httpx.Response(
|
||||||
|
status_code=400,
|
||||||
|
content="Unsupported provider",
|
||||||
|
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
async def afile_content(
|
||||||
|
file_id: str,
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
|
||||||
|
"""
|
||||||
|
Async: Get file contents
|
||||||
|
|
||||||
|
LiteLLM Equivalent of GET https://api.openai.com/v1/files
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
kwargs["afile_content"] = True
|
||||||
|
|
||||||
|
# Use a partial function to pass your keyword arguments
|
||||||
|
func = partial(
|
||||||
|
file_content,
|
||||||
|
file_id,
|
||||||
|
custom_llm_provider,
|
||||||
|
extra_headers,
|
||||||
|
extra_body,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add the context to the function
|
||||||
|
ctx = contextvars.copy_context()
|
||||||
|
func_with_context = partial(ctx.run, func)
|
||||||
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
if asyncio.iscoroutine(init_response):
|
||||||
|
response = await init_response
|
||||||
|
else:
|
||||||
|
response = init_response # type: ignore
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def file_content(
|
||||||
|
file_id: str,
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
|
||||||
|
"""
|
||||||
|
Returns the contents of the specified file.
|
||||||
|
|
||||||
|
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
optional_params = GenericLiteLLMParams(**kwargs)
|
||||||
|
if custom_llm_provider == "openai":
|
||||||
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
|
api_base = (
|
||||||
|
optional_params.api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or os.getenv("OPENAI_API_BASE")
|
||||||
|
or "https://api.openai.com/v1"
|
||||||
|
)
|
||||||
|
organization = (
|
||||||
|
optional_params.organization
|
||||||
|
or litellm.organization
|
||||||
|
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
)
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
optional_params.api_key
|
||||||
|
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||||
|
or litellm.openai_key
|
||||||
|
or os.getenv("OPENAI_API_KEY")
|
||||||
|
)
|
||||||
|
### TIMEOUT LOGIC ###
|
||||||
|
timeout = (
|
||||||
|
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||||
|
)
|
||||||
|
# set timeout for 10 minutes by default
|
||||||
|
|
||||||
|
if (
|
||||||
|
timeout is not None
|
||||||
|
and isinstance(timeout, httpx.Timeout)
|
||||||
|
and supports_httpx_timeout(custom_llm_provider) == False
|
||||||
|
):
|
||||||
|
read_timeout = timeout.read or 600
|
||||||
|
timeout = read_timeout # default 10 min timeout
|
||||||
|
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||||
|
timeout = float(timeout) # type: ignore
|
||||||
|
elif timeout is None:
|
||||||
|
timeout = 600.0
|
||||||
|
|
||||||
|
_file_content_request = FileContentRequest(
|
||||||
|
file_id=file_id,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_body=extra_body,
|
||||||
|
)
|
||||||
|
|
||||||
|
_is_async = kwargs.pop("afile_content", False) is True
|
||||||
|
|
||||||
|
response = openai_files_instance.file_content(
|
||||||
|
_is_async=_is_async,
|
||||||
|
file_content_request=_file_content_request,
|
||||||
|
api_base=api_base,
|
||||||
|
api_key=api_key,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=optional_params.max_retries,
|
||||||
|
organization=organization,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise litellm.exceptions.BadRequestError(
|
||||||
|
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||||
|
custom_llm_provider
|
||||||
|
),
|
||||||
|
model="n/a",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=httpx.Response(
|
||||||
|
status_code=400,
|
||||||
|
content="Unsupported provider",
|
||||||
|
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
async def acreate_batch(
|
||||||
|
completion_window: Literal["24h"],
|
||||||
|
endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
|
||||||
|
input_file_id: str,
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
metadata: Optional[Dict[str, str]] = None,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Coroutine[Any, Any, Batch]:
|
||||||
|
"""
|
||||||
|
Async: Creates and executes a batch from an uploaded file of request
|
||||||
|
|
||||||
|
LiteLLM Equivalent of POST: https://api.openai.com/v1/batches
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
kwargs["acreate_batch"] = True
|
||||||
|
|
||||||
|
# Use a partial function to pass your keyword arguments
|
||||||
|
func = partial(
|
||||||
|
create_batch,
|
||||||
|
completion_window,
|
||||||
|
endpoint,
|
||||||
|
input_file_id,
|
||||||
|
custom_llm_provider,
|
||||||
|
metadata,
|
||||||
|
extra_headers,
|
||||||
|
extra_body,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add the context to the function
|
||||||
|
ctx = contextvars.copy_context()
|
||||||
|
func_with_context = partial(ctx.run, func)
|
||||||
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
if asyncio.iscoroutine(init_response):
|
||||||
|
response = await init_response
|
||||||
|
else:
|
||||||
|
response = init_response # type: ignore
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def create_batch(
|
||||||
|
completion_window: Literal["24h"],
|
||||||
|
endpoint: Literal["/v1/chat/completions", "/v1/embeddings"],
|
||||||
|
input_file_id: str,
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
metadata: Optional[Dict[str, str]] = None,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||||
|
"""
|
||||||
|
Creates and executes a batch from an uploaded file of request
|
||||||
|
|
||||||
|
LiteLLM Equivalent of POST: https://api.openai.com/v1/batches
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
optional_params = GenericLiteLLMParams(**kwargs)
|
||||||
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
|
api_base = (
|
||||||
|
optional_params.api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or os.getenv("OPENAI_API_BASE")
|
||||||
|
or "https://api.openai.com/v1"
|
||||||
|
)
|
||||||
|
organization = (
|
||||||
|
optional_params.organization
|
||||||
|
or litellm.organization
|
||||||
|
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
)
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
optional_params.api_key
|
||||||
|
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||||
|
or litellm.openai_key
|
||||||
|
or os.getenv("OPENAI_API_KEY")
|
||||||
|
)
|
||||||
|
### TIMEOUT LOGIC ###
|
||||||
|
timeout = (
|
||||||
|
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||||
|
)
|
||||||
|
# set timeout for 10 minutes by default
|
||||||
|
|
||||||
|
if (
|
||||||
|
timeout is not None
|
||||||
|
and isinstance(timeout, httpx.Timeout)
|
||||||
|
and supports_httpx_timeout(custom_llm_provider) == False
|
||||||
|
):
|
||||||
|
read_timeout = timeout.read or 600
|
||||||
|
timeout = read_timeout # default 10 min timeout
|
||||||
|
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||||
|
timeout = float(timeout) # type: ignore
|
||||||
|
elif timeout is None:
|
||||||
|
timeout = 600.0
|
||||||
|
|
||||||
|
_is_async = kwargs.pop("acreate_batch", False) is True
|
||||||
|
|
||||||
|
_create_batch_request = CreateBatchRequest(
|
||||||
|
completion_window=completion_window,
|
||||||
|
endpoint=endpoint,
|
||||||
|
input_file_id=input_file_id,
|
||||||
|
metadata=metadata,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_body=extra_body,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = openai_batches_instance.create_batch(
|
||||||
|
api_base=api_base,
|
||||||
|
api_key=api_key,
|
||||||
|
organization=organization,
|
||||||
|
create_batch_data=_create_batch_request,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=optional_params.max_retries,
|
||||||
|
_is_async=_is_async,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise litellm.exceptions.BadRequestError(
|
||||||
|
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||||
|
custom_llm_provider
|
||||||
|
),
|
||||||
|
model="n/a",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=httpx.Response(
|
||||||
|
status_code=400,
|
||||||
|
content="Unsupported provider",
|
||||||
|
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
async def aretrieve_batch(
|
||||||
|
batch_id: str,
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
metadata: Optional[Dict[str, str]] = None,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Coroutine[Any, Any, Batch]:
|
||||||
|
"""
|
||||||
|
Async: Retrieves a batch.
|
||||||
|
|
||||||
|
LiteLLM Equivalent of GET https://api.openai.com/v1/batches/{batch_id}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
kwargs["aretrieve_batch"] = True
|
||||||
|
|
||||||
|
# Use a partial function to pass your keyword arguments
|
||||||
|
func = partial(
|
||||||
|
retrieve_batch,
|
||||||
|
batch_id,
|
||||||
|
custom_llm_provider,
|
||||||
|
metadata,
|
||||||
|
extra_headers,
|
||||||
|
extra_body,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add the context to the function
|
||||||
|
ctx = contextvars.copy_context()
|
||||||
|
func_with_context = partial(ctx.run, func)
|
||||||
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
if asyncio.iscoroutine(init_response):
|
||||||
|
response = await init_response
|
||||||
|
else:
|
||||||
|
response = init_response # type: ignore
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def retrieve_batch(
|
||||||
|
batch_id: str,
|
||||||
|
custom_llm_provider: Literal["openai"] = "openai",
|
||||||
|
metadata: Optional[Dict[str, str]] = None,
|
||||||
|
extra_headers: Optional[Dict[str, str]] = None,
|
||||||
|
extra_body: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||||
|
"""
|
||||||
|
Retrieves a batch.
|
||||||
|
|
||||||
|
LiteLLM Equivalent of GET https://api.openai.com/v1/batches/{batch_id}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
optional_params = GenericLiteLLMParams(**kwargs)
|
||||||
|
if custom_llm_provider == "openai":
|
||||||
|
|
||||||
|
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
|
api_base = (
|
||||||
|
optional_params.api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or os.getenv("OPENAI_API_BASE")
|
||||||
|
or "https://api.openai.com/v1"
|
||||||
|
)
|
||||||
|
organization = (
|
||||||
|
optional_params.organization
|
||||||
|
or litellm.organization
|
||||||
|
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
)
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
optional_params.api_key
|
||||||
|
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||||
|
or litellm.openai_key
|
||||||
|
or os.getenv("OPENAI_API_KEY")
|
||||||
|
)
|
||||||
|
### TIMEOUT LOGIC ###
|
||||||
|
timeout = (
|
||||||
|
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||||
|
)
|
||||||
|
# set timeout for 10 minutes by default
|
||||||
|
|
||||||
|
if (
|
||||||
|
timeout is not None
|
||||||
|
and isinstance(timeout, httpx.Timeout)
|
||||||
|
and supports_httpx_timeout(custom_llm_provider) == False
|
||||||
|
):
|
||||||
|
read_timeout = timeout.read or 600
|
||||||
|
timeout = read_timeout # default 10 min timeout
|
||||||
|
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||||
|
timeout = float(timeout) # type: ignore
|
||||||
|
elif timeout is None:
|
||||||
|
timeout = 600.0
|
||||||
|
|
||||||
|
_retrieve_batch_request = RetrieveBatchRequest(
|
||||||
|
batch_id=batch_id,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
extra_body=extra_body,
|
||||||
|
)
|
||||||
|
|
||||||
|
_is_async = kwargs.pop("aretrieve_batch", False) is True
|
||||||
|
|
||||||
|
response = openai_batches_instance.retrieve_batch(
|
||||||
|
_is_async=_is_async,
|
||||||
|
retrieve_batch_data=_retrieve_batch_request,
|
||||||
|
api_base=api_base,
|
||||||
|
api_key=api_key,
|
||||||
|
organization=organization,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=optional_params.max_retries,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise litellm.exceptions.BadRequestError(
|
||||||
|
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||||
|
custom_llm_provider
|
||||||
|
),
|
||||||
|
model="n/a",
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=httpx.Response(
|
||||||
|
status_code=400,
|
||||||
|
content="Unsupported provider",
|
||||||
|
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def cancel_batch():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def list_batch():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def acancel_batch():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def alist_batch():
|
||||||
|
pass
|
|
@ -314,6 +314,7 @@ class BudgetExceededError(Exception):
|
||||||
self.current_cost = current_cost
|
self.current_cost = current_cost
|
||||||
self.max_budget = max_budget
|
self.max_budget = max_budget
|
||||||
message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
|
message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
|
||||||
|
self.message = message
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -455,8 +455,13 @@ class LangFuseLogger:
|
||||||
}
|
}
|
||||||
generation_name = clean_metadata.pop("generation_name", None)
|
generation_name = clean_metadata.pop("generation_name", None)
|
||||||
if generation_name is None:
|
if generation_name is None:
|
||||||
# just log `litellm-{call_type}` as the generation name
|
# if `generation_name` is None, use sensible default values
|
||||||
|
# If using litellm proxy user `key_alias` if not None
|
||||||
|
# If `key_alias` is None, just log `litellm-{call_type}` as the generation name
|
||||||
|
_user_api_key_alias = clean_metadata.get("user_api_key_alias", None)
|
||||||
generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
|
generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
|
||||||
|
if _user_api_key_alias is not None:
|
||||||
|
generation_name = f"litellm:{_user_api_key_alias}"
|
||||||
|
|
||||||
if response_obj is not None and "system_fingerprint" in response_obj:
|
if response_obj is not None and "system_fingerprint" in response_obj:
|
||||||
system_fingerprint = response_obj.get("system_fingerprint", None)
|
system_fingerprint = response_obj.get("system_fingerprint", None)
|
||||||
|
|
|
@ -41,6 +41,7 @@ class ProviderRegionOutageModel(BaseOutageModel):
|
||||||
|
|
||||||
# we use this for the email header, please send a test email if you change this. verify it looks good on email
|
# we use this for the email header, please send a test email if you change this. verify it looks good on email
|
||||||
LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
||||||
|
LITELLM_SUPPORT_CONTACT = "support@berri.ai"
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
|
@ -683,14 +684,16 @@ class SlackAlerting(CustomLogger):
|
||||||
event: Optional[
|
event: Optional[
|
||||||
Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
|
Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]
|
||||||
] = None
|
] = None
|
||||||
event_group: Optional[Literal["user", "team", "key", "proxy"]] = None
|
event_group: Optional[
|
||||||
|
Literal["internal_user", "team", "key", "proxy", "customer"]
|
||||||
|
] = None
|
||||||
event_message: str = ""
|
event_message: str = ""
|
||||||
webhook_event: Optional[WebhookEvent] = None
|
webhook_event: Optional[WebhookEvent] = None
|
||||||
if type == "proxy_budget":
|
if type == "proxy_budget":
|
||||||
event_group = "proxy"
|
event_group = "proxy"
|
||||||
event_message += "Proxy Budget: "
|
event_message += "Proxy Budget: "
|
||||||
elif type == "user_budget":
|
elif type == "user_budget":
|
||||||
event_group = "user"
|
event_group = "internal_user"
|
||||||
event_message += "User Budget: "
|
event_message += "User Budget: "
|
||||||
_id = user_info.user_id or _id
|
_id = user_info.user_id or _id
|
||||||
elif type == "team_budget":
|
elif type == "team_budget":
|
||||||
|
@ -754,6 +757,36 @@ class SlackAlerting(CustomLogger):
|
||||||
return
|
return
|
||||||
return
|
return
|
||||||
|
|
||||||
|
async def customer_spend_alert(
|
||||||
|
self,
|
||||||
|
token: Optional[str],
|
||||||
|
key_alias: Optional[str],
|
||||||
|
end_user_id: Optional[str],
|
||||||
|
response_cost: Optional[float],
|
||||||
|
max_budget: Optional[float],
|
||||||
|
):
|
||||||
|
if end_user_id is not None and token is not None and response_cost is not None:
|
||||||
|
# log customer spend
|
||||||
|
event = WebhookEvent(
|
||||||
|
spend=response_cost,
|
||||||
|
max_budget=max_budget,
|
||||||
|
token=token,
|
||||||
|
customer_id=end_user_id,
|
||||||
|
user_id=None,
|
||||||
|
team_id=None,
|
||||||
|
user_email=None,
|
||||||
|
key_alias=key_alias,
|
||||||
|
projected_exceeded_date=None,
|
||||||
|
projected_spend=None,
|
||||||
|
event="spend_tracked",
|
||||||
|
event_group="customer",
|
||||||
|
event_message="Customer spend tracked. Customer={}, spend={}".format(
|
||||||
|
end_user_id, response_cost
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
await self.send_webhook_alert(webhook_event=event)
|
||||||
|
|
||||||
def _count_outage_alerts(self, alerts: List[int]) -> str:
|
def _count_outage_alerts(self, alerts: List[int]) -> str:
|
||||||
"""
|
"""
|
||||||
Parameters:
|
Parameters:
|
||||||
|
@ -1171,6 +1204,10 @@ Model Info:
|
||||||
await self._check_if_using_premium_email_feature(
|
await self._check_if_using_premium_email_feature(
|
||||||
premium_user, email_logo_url, email_support_contact
|
premium_user, email_logo_url, email_support_contact
|
||||||
)
|
)
|
||||||
|
if email_logo_url is None:
|
||||||
|
email_logo_url = LITELLM_LOGO_URL
|
||||||
|
if email_support_contact is None:
|
||||||
|
email_support_contact = LITELLM_SUPPORT_CONTACT
|
||||||
|
|
||||||
event_name = webhook_event.event_message
|
event_name = webhook_event.event_message
|
||||||
recipient_email = webhook_event.user_email
|
recipient_email = webhook_event.user_email
|
||||||
|
@ -1271,6 +1308,11 @@ Model Info:
|
||||||
premium_user, email_logo_url, email_support_contact
|
premium_user, email_logo_url, email_support_contact
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if email_logo_url is None:
|
||||||
|
email_logo_url = LITELLM_LOGO_URL
|
||||||
|
if email_support_contact is None:
|
||||||
|
email_support_contact = LITELLM_SUPPORT_CONTACT
|
||||||
|
|
||||||
event_name = webhook_event.event_message
|
event_name = webhook_event.event_message
|
||||||
recipient_email = webhook_event.user_email
|
recipient_email = webhook_event.user_email
|
||||||
user_name = webhook_event.user_id
|
user_name = webhook_event.user_id
|
||||||
|
@ -1401,7 +1443,9 @@ Model Info:
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
print("Error sending slack alert. Error=", response.text) # noqa
|
verbose_proxy_logger.debug(
|
||||||
|
"Error sending slack alert. Error=", response.text
|
||||||
|
)
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
"""Log deployment latency"""
|
"""Log deployment latency"""
|
||||||
|
@ -1421,6 +1465,8 @@ Model Info:
|
||||||
final_value = float(
|
final_value = float(
|
||||||
response_s.total_seconds() / completion_tokens
|
response_s.total_seconds() / completion_tokens
|
||||||
)
|
)
|
||||||
|
if isinstance(final_value, timedelta):
|
||||||
|
final_value = final_value.total_seconds()
|
||||||
|
|
||||||
await self.async_update_daily_reports(
|
await self.async_update_daily_reports(
|
||||||
DeploymentMetrics(
|
DeploymentMetrics(
|
||||||
|
|
|
@ -1,114 +1,153 @@
|
||||||
|
import traceback
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
class TraceloopLogger:
|
class TraceloopLogger:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
from traceloop.sdk.tracing.tracing import TracerWrapper
|
try:
|
||||||
from traceloop.sdk import Traceloop
|
from traceloop.sdk.tracing.tracing import TracerWrapper
|
||||||
|
from traceloop.sdk import Traceloop
|
||||||
|
from traceloop.sdk.instruments import Instruments
|
||||||
|
except ModuleNotFoundError as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
f"Traceloop not installed, try running 'pip install traceloop-sdk' to fix this error: {e}\n{traceback.format_exc()}"
|
||||||
|
)
|
||||||
|
|
||||||
Traceloop.init(app_name="Litellm-Server", disable_batch=True)
|
Traceloop.init(
|
||||||
|
app_name="Litellm-Server",
|
||||||
|
disable_batch=True,
|
||||||
|
instruments=[
|
||||||
|
Instruments.CHROMA,
|
||||||
|
Instruments.PINECONE,
|
||||||
|
Instruments.WEAVIATE,
|
||||||
|
Instruments.LLAMA_INDEX,
|
||||||
|
Instruments.LANGCHAIN,
|
||||||
|
],
|
||||||
|
)
|
||||||
self.tracer_wrapper = TracerWrapper()
|
self.tracer_wrapper = TracerWrapper()
|
||||||
|
|
||||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
def log_event(
|
||||||
from opentelemetry.trace import SpanKind
|
self,
|
||||||
|
kwargs,
|
||||||
|
response_obj,
|
||||||
|
start_time,
|
||||||
|
end_time,
|
||||||
|
user_id,
|
||||||
|
print_verbose,
|
||||||
|
level="DEFAULT",
|
||||||
|
status_message=None,
|
||||||
|
):
|
||||||
|
from opentelemetry import trace
|
||||||
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
||||||
from opentelemetry.semconv.ai import SpanAttributes
|
from opentelemetry.semconv.ai import SpanAttributes
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
print_verbose(
|
||||||
|
f"Traceloop Logging - Enters logging function for model {kwargs}"
|
||||||
|
)
|
||||||
|
|
||||||
tracer = self.tracer_wrapper.get_tracer()
|
tracer = self.tracer_wrapper.get_tracer()
|
||||||
|
|
||||||
model = kwargs.get("model")
|
|
||||||
|
|
||||||
# LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
|
|
||||||
if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
|
|
||||||
return
|
|
||||||
|
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
with tracer.start_as_current_span(
|
span = tracer.start_span(
|
||||||
"litellm.completion",
|
"litellm.completion", kind=SpanKind.CLIENT, start_time=start_time
|
||||||
kind=SpanKind.CLIENT,
|
)
|
||||||
) as span:
|
|
||||||
if span.is_recording():
|
if span.is_recording():
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
|
||||||
|
)
|
||||||
|
if "stop" in optional_params:
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
|
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
|
||||||
|
optional_params.get("stop"),
|
||||||
)
|
)
|
||||||
if "stop" in optional_params:
|
if "frequency_penalty" in optional_params:
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
|
|
||||||
optional_params.get("stop"),
|
|
||||||
)
|
|
||||||
if "frequency_penalty" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_FREQUENCY_PENALTY,
|
|
||||||
optional_params.get("frequency_penalty"),
|
|
||||||
)
|
|
||||||
if "presence_penalty" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_PRESENCE_PENALTY,
|
|
||||||
optional_params.get("presence_penalty"),
|
|
||||||
)
|
|
||||||
if "top_p" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
|
|
||||||
)
|
|
||||||
if "tools" in optional_params or "functions" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_REQUEST_FUNCTIONS,
|
|
||||||
optional_params.get(
|
|
||||||
"tools", optional_params.get("functions")
|
|
||||||
),
|
|
||||||
)
|
|
||||||
if "user" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_USER, optional_params.get("user")
|
|
||||||
)
|
|
||||||
if "max_tokens" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
|
|
||||||
kwargs.get("max_tokens"),
|
|
||||||
)
|
|
||||||
if "temperature" in optional_params:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
|
|
||||||
)
|
|
||||||
|
|
||||||
for idx, prompt in enumerate(kwargs.get("messages")):
|
|
||||||
span.set_attribute(
|
|
||||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
|
|
||||||
prompt.get("role"),
|
|
||||||
)
|
|
||||||
span.set_attribute(
|
|
||||||
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
|
|
||||||
prompt.get("content"),
|
|
||||||
)
|
|
||||||
|
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
|
SpanAttributes.LLM_FREQUENCY_PENALTY,
|
||||||
|
optional_params.get("frequency_penalty"),
|
||||||
|
)
|
||||||
|
if "presence_penalty" in optional_params:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_PRESENCE_PENALTY,
|
||||||
|
optional_params.get("presence_penalty"),
|
||||||
|
)
|
||||||
|
if "top_p" in optional_params:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
|
||||||
|
)
|
||||||
|
if "tools" in optional_params or "functions" in optional_params:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_REQUEST_FUNCTIONS,
|
||||||
|
optional_params.get("tools", optional_params.get("functions")),
|
||||||
|
)
|
||||||
|
if "user" in optional_params:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_USER, optional_params.get("user")
|
||||||
|
)
|
||||||
|
if "max_tokens" in optional_params:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_REQUEST_MAX_TOKENS,
|
||||||
|
kwargs.get("max_tokens"),
|
||||||
|
)
|
||||||
|
if "temperature" in optional_params:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_REQUEST_TEMPERATURE,
|
||||||
|
kwargs.get("temperature"),
|
||||||
)
|
)
|
||||||
usage = response_obj.get("usage")
|
|
||||||
if usage:
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
|
||||||
usage.get("total_tokens"),
|
|
||||||
)
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
|
||||||
usage.get("completion_tokens"),
|
|
||||||
)
|
|
||||||
span.set_attribute(
|
|
||||||
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
|
||||||
usage.get("prompt_tokens"),
|
|
||||||
)
|
|
||||||
|
|
||||||
for idx, choice in enumerate(response_obj.get("choices")):
|
for idx, prompt in enumerate(kwargs.get("messages")):
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
|
f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
|
||||||
choice.get("finish_reason"),
|
prompt.get("role"),
|
||||||
)
|
)
|
||||||
span.set_attribute(
|
span.set_attribute(
|
||||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
|
f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
|
||||||
choice.get("message").get("role"),
|
prompt.get("content"),
|
||||||
)
|
)
|
||||||
span.set_attribute(
|
|
||||||
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
|
span.set_attribute(
|
||||||
choice.get("message").get("content"),
|
SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
|
||||||
)
|
)
|
||||||
|
usage = response_obj.get("usage")
|
||||||
|
if usage:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
|
||||||
|
usage.get("total_tokens"),
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
||||||
|
usage.get("completion_tokens"),
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
||||||
|
usage.get("prompt_tokens"),
|
||||||
|
)
|
||||||
|
|
||||||
|
for idx, choice in enumerate(response_obj.get("choices")):
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
|
||||||
|
choice.get("finish_reason"),
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
|
||||||
|
choice.get("message").get("role"),
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
|
||||||
|
choice.get("message").get("content"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
level == "ERROR"
|
||||||
|
and status_message is not None
|
||||||
|
and isinstance(status_message, str)
|
||||||
|
):
|
||||||
|
span.record_exception(Exception(status_message))
|
||||||
|
span.set_status(Status(StatusCode.ERROR, status_message))
|
||||||
|
|
||||||
|
span.end(end_time)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Traceloop Layer Error - {e}")
|
print_verbose(f"Traceloop Layer Error - {e}")
|
||||||
|
|
|
@ -379,13 +379,12 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
headers={},
|
headers={},
|
||||||
):
|
):
|
||||||
self.async_handler = AsyncHTTPHandler(
|
|
||||||
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
async_handler = AsyncHTTPHandler(
|
||||||
|
timeout=httpx.Timeout(timeout=600.0, connect=20.0)
|
||||||
)
|
)
|
||||||
data["stream"] = True
|
data["stream"] = True
|
||||||
response = await self.async_handler.post(
|
response = await async_handler.post(api_base, headers=headers, json=data)
|
||||||
api_base, headers=headers, data=json.dumps(data), stream=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise AnthropicError(
|
raise AnthropicError(
|
||||||
|
@ -421,12 +420,10 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
headers={},
|
headers={},
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
self.async_handler = AsyncHTTPHandler(
|
async_handler = AsyncHTTPHandler(
|
||||||
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
)
|
)
|
||||||
response = await self.async_handler.post(
|
response = await async_handler.post(api_base, headers=headers, json=data)
|
||||||
api_base, headers=headers, data=json.dumps(data)
|
|
||||||
)
|
|
||||||
if stream and _is_function_call:
|
if stream and _is_function_call:
|
||||||
return self.process_streaming_response(
|
return self.process_streaming_response(
|
||||||
model=model,
|
model=model,
|
||||||
|
|
|
@ -43,12 +43,13 @@ class AsyncHTTPHandler:
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
data: Optional[Union[dict, str]] = None, # type: ignore
|
data: Optional[Union[dict, str]] = None, # type: ignore
|
||||||
|
json: Optional[dict] = None,
|
||||||
params: Optional[dict] = None,
|
params: Optional[dict] = None,
|
||||||
headers: Optional[dict] = None,
|
headers: Optional[dict] = None,
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
):
|
):
|
||||||
req = self.client.build_request(
|
req = self.client.build_request(
|
||||||
"POST", url, data=data, params=params, headers=headers # type: ignore
|
"POST", url, data=data, json=json, params=params, headers=headers # type: ignore
|
||||||
)
|
)
|
||||||
response = await self.client.send(req, stream=stream)
|
response = await self.client.send(req, stream=stream)
|
||||||
return response
|
return response
|
||||||
|
|
|
@ -45,6 +45,8 @@ class OllamaConfig:
|
||||||
|
|
||||||
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
|
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
|
||||||
|
|
||||||
|
- `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
|
||||||
|
|
||||||
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
|
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
|
||||||
|
|
||||||
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
|
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
|
||||||
|
@ -69,6 +71,7 @@ class OllamaConfig:
|
||||||
repeat_last_n: Optional[int] = None
|
repeat_last_n: Optional[int] = None
|
||||||
repeat_penalty: Optional[float] = None
|
repeat_penalty: Optional[float] = None
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
|
seed: Optional[int] = None
|
||||||
stop: Optional[list] = (
|
stop: Optional[list] = (
|
||||||
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
|
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
|
||||||
)
|
)
|
||||||
|
@ -90,6 +93,7 @@ class OllamaConfig:
|
||||||
repeat_last_n: Optional[int] = None,
|
repeat_last_n: Optional[int] = None,
|
||||||
repeat_penalty: Optional[float] = None,
|
repeat_penalty: Optional[float] = None,
|
||||||
temperature: Optional[float] = None,
|
temperature: Optional[float] = None,
|
||||||
|
seed: Optional[int] = None,
|
||||||
stop: Optional[list] = None,
|
stop: Optional[list] = None,
|
||||||
tfs_z: Optional[float] = None,
|
tfs_z: Optional[float] = None,
|
||||||
num_predict: Optional[int] = None,
|
num_predict: Optional[int] = None,
|
||||||
|
@ -120,6 +124,44 @@ class OllamaConfig:
|
||||||
)
|
)
|
||||||
and v is not None
|
and v is not None
|
||||||
}
|
}
|
||||||
|
def get_supported_openai_params(
|
||||||
|
self,
|
||||||
|
):
|
||||||
|
return [
|
||||||
|
"max_tokens",
|
||||||
|
"stream",
|
||||||
|
"top_p",
|
||||||
|
"temperature",
|
||||||
|
"seed",
|
||||||
|
"frequency_penalty",
|
||||||
|
"stop",
|
||||||
|
"response_format",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
|
||||||
|
# and convert to jpeg if necessary.
|
||||||
|
def _convert_image(image):
|
||||||
|
import base64, io
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
except:
|
||||||
|
raise Exception(
|
||||||
|
"ollama image conversion failed please run `pip install Pillow`"
|
||||||
|
)
|
||||||
|
|
||||||
|
orig = image
|
||||||
|
if image.startswith("data:"):
|
||||||
|
image = image.split(",")[-1]
|
||||||
|
try:
|
||||||
|
image_data = Image.open(io.BytesIO(base64.b64decode(image)))
|
||||||
|
if image_data.format in ["JPEG", "PNG"]:
|
||||||
|
return image
|
||||||
|
except:
|
||||||
|
return orig
|
||||||
|
jpeg_image = io.BytesIO()
|
||||||
|
image_data.convert("RGB").save(jpeg_image, "JPEG")
|
||||||
|
jpeg_image.seek(0)
|
||||||
|
return base64.b64encode(jpeg_image.getvalue()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
# ollama implementation
|
# ollama implementation
|
||||||
|
@ -158,7 +200,7 @@ def get_ollama_response(
|
||||||
if format is not None:
|
if format is not None:
|
||||||
data["format"] = format
|
data["format"] = format
|
||||||
if images is not None:
|
if images is not None:
|
||||||
data["images"] = images
|
data["images"] = [_convert_image(image) for image in images]
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
|
|
|
@ -45,6 +45,8 @@ class OllamaChatConfig:
|
||||||
|
|
||||||
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
|
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
|
||||||
|
|
||||||
|
- `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
|
||||||
|
|
||||||
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
|
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
|
||||||
|
|
||||||
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
|
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
|
||||||
|
@ -69,6 +71,7 @@ class OllamaChatConfig:
|
||||||
repeat_last_n: Optional[int] = None
|
repeat_last_n: Optional[int] = None
|
||||||
repeat_penalty: Optional[float] = None
|
repeat_penalty: Optional[float] = None
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
|
seed: Optional[int] = None
|
||||||
stop: Optional[list] = (
|
stop: Optional[list] = (
|
||||||
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
|
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
|
||||||
)
|
)
|
||||||
|
@ -90,6 +93,7 @@ class OllamaChatConfig:
|
||||||
repeat_last_n: Optional[int] = None,
|
repeat_last_n: Optional[int] = None,
|
||||||
repeat_penalty: Optional[float] = None,
|
repeat_penalty: Optional[float] = None,
|
||||||
temperature: Optional[float] = None,
|
temperature: Optional[float] = None,
|
||||||
|
seed: Optional[int] = None,
|
||||||
stop: Optional[list] = None,
|
stop: Optional[list] = None,
|
||||||
tfs_z: Optional[float] = None,
|
tfs_z: Optional[float] = None,
|
||||||
num_predict: Optional[int] = None,
|
num_predict: Optional[int] = None,
|
||||||
|
@ -130,6 +134,7 @@ class OllamaChatConfig:
|
||||||
"stream",
|
"stream",
|
||||||
"top_p",
|
"top_p",
|
||||||
"temperature",
|
"temperature",
|
||||||
|
"seed",
|
||||||
"frequency_penalty",
|
"frequency_penalty",
|
||||||
"stop",
|
"stop",
|
||||||
"tools",
|
"tools",
|
||||||
|
@ -146,6 +151,8 @@ class OllamaChatConfig:
|
||||||
optional_params["stream"] = value
|
optional_params["stream"] = value
|
||||||
if param == "temperature":
|
if param == "temperature":
|
||||||
optional_params["temperature"] = value
|
optional_params["temperature"] = value
|
||||||
|
if param == "seed":
|
||||||
|
optional_params["seed"] = value
|
||||||
if param == "top_p":
|
if param == "top_p":
|
||||||
optional_params["top_p"] = value
|
optional_params["top_p"] = value
|
||||||
if param == "frequency_penalty":
|
if param == "frequency_penalty":
|
||||||
|
|
|
@ -21,11 +21,12 @@ from litellm.utils import (
|
||||||
TranscriptionResponse,
|
TranscriptionResponse,
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
)
|
)
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional, Coroutine
|
||||||
import litellm
|
import litellm
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
from openai import OpenAI, AsyncOpenAI
|
from openai import OpenAI, AsyncOpenAI
|
||||||
from ..types.llms.openai import *
|
from ..types.llms.openai import *
|
||||||
|
import openai
|
||||||
|
|
||||||
|
|
||||||
class OpenAIError(Exception):
|
class OpenAIError(Exception):
|
||||||
|
@ -349,7 +350,6 @@ class OpenAIConfig:
|
||||||
"top_p",
|
"top_p",
|
||||||
"tools",
|
"tools",
|
||||||
"tool_choice",
|
"tool_choice",
|
||||||
"user",
|
|
||||||
"function_call",
|
"function_call",
|
||||||
"functions",
|
"functions",
|
||||||
"max_retries",
|
"max_retries",
|
||||||
|
@ -362,6 +362,12 @@ class OpenAIConfig:
|
||||||
): # gpt-4 does not support 'response_format'
|
): # gpt-4 does not support 'response_format'
|
||||||
model_specific_params.append("response_format")
|
model_specific_params.append("response_format")
|
||||||
|
|
||||||
|
if (
|
||||||
|
model in litellm.open_ai_chat_completion_models
|
||||||
|
) or model in litellm.open_ai_text_completion_models:
|
||||||
|
model_specific_params.append(
|
||||||
|
"user"
|
||||||
|
) # user is not a param supported by all openai-compatible endpoints - e.g. azure ai
|
||||||
return base_params + model_specific_params
|
return base_params + model_specific_params
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
|
@ -1085,8 +1091,8 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
model_response: TranscriptionResponse,
|
model_response: TranscriptionResponse,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
max_retries: int,
|
max_retries: int,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str],
|
||||||
api_base: Optional[str] = None,
|
api_base: Optional[str],
|
||||||
client=None,
|
client=None,
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
atranscription: bool = False,
|
atranscription: bool = False,
|
||||||
|
@ -1142,7 +1148,6 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
max_retries=None,
|
max_retries=None,
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
):
|
):
|
||||||
response = None
|
|
||||||
try:
|
try:
|
||||||
if client is None:
|
if client is None:
|
||||||
openai_aclient = AsyncOpenAI(
|
openai_aclient = AsyncOpenAI(
|
||||||
|
@ -1176,6 +1181,95 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def audio_speech(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
input: str,
|
||||||
|
voice: str,
|
||||||
|
optional_params: dict,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
organization: Optional[str],
|
||||||
|
project: Optional[str],
|
||||||
|
max_retries: int,
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
aspeech: Optional[bool] = None,
|
||||||
|
client=None,
|
||||||
|
) -> HttpxBinaryResponseContent:
|
||||||
|
|
||||||
|
if aspeech is not None and aspeech == True:
|
||||||
|
return self.async_audio_speech(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
voice=voice,
|
||||||
|
optional_params=optional_params,
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
organization=organization,
|
||||||
|
project=project,
|
||||||
|
max_retries=max_retries,
|
||||||
|
timeout=timeout,
|
||||||
|
client=client,
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
openai_client = OpenAI(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=api_base,
|
||||||
|
organization=organization,
|
||||||
|
project=project,
|
||||||
|
http_client=litellm.client_session,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
openai_client = client
|
||||||
|
|
||||||
|
response = openai_client.audio.speech.create(
|
||||||
|
model=model,
|
||||||
|
voice=voice, # type: ignore
|
||||||
|
input=input,
|
||||||
|
**optional_params,
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
async def async_audio_speech(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
input: str,
|
||||||
|
voice: str,
|
||||||
|
optional_params: dict,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
organization: Optional[str],
|
||||||
|
project: Optional[str],
|
||||||
|
max_retries: int,
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
client=None,
|
||||||
|
) -> HttpxBinaryResponseContent:
|
||||||
|
|
||||||
|
if client is None:
|
||||||
|
openai_client = AsyncOpenAI(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=api_base,
|
||||||
|
organization=organization,
|
||||||
|
project=project,
|
||||||
|
http_client=litellm.aclient_session,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
openai_client = client
|
||||||
|
|
||||||
|
response = await openai_client.audio.speech.create(
|
||||||
|
model=model,
|
||||||
|
voice=voice, # type: ignore
|
||||||
|
input=input,
|
||||||
|
**optional_params,
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
async def ahealth_check(
|
async def ahealth_check(
|
||||||
self,
|
self,
|
||||||
model: Optional[str],
|
model: Optional[str],
|
||||||
|
@ -1497,6 +1591,322 @@ class OpenAITextCompletion(BaseLLM):
|
||||||
yield transformed_chunk
|
yield transformed_chunk
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIFilesAPI(BaseLLM):
|
||||||
|
"""
|
||||||
|
OpenAI methods to support for batches
|
||||||
|
- create_file()
|
||||||
|
- retrieve_file()
|
||||||
|
- list_files()
|
||||||
|
- delete_file()
|
||||||
|
- file_content()
|
||||||
|
- update_file()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def get_openai_client(
|
||||||
|
self,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||||
|
_is_async: bool = False,
|
||||||
|
) -> Optional[Union[OpenAI, AsyncOpenAI]]:
|
||||||
|
received_args = locals()
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
|
||||||
|
if client is None:
|
||||||
|
data = {}
|
||||||
|
for k, v in received_args.items():
|
||||||
|
if k == "self" or k == "client" or k == "_is_async":
|
||||||
|
pass
|
||||||
|
elif k == "api_base" and v is not None:
|
||||||
|
data["base_url"] = v
|
||||||
|
elif v is not None:
|
||||||
|
data[k] = v
|
||||||
|
if _is_async is True:
|
||||||
|
openai_client = AsyncOpenAI(**data)
|
||||||
|
else:
|
||||||
|
openai_client = OpenAI(**data) # type: ignore
|
||||||
|
else:
|
||||||
|
openai_client = client
|
||||||
|
|
||||||
|
return openai_client
|
||||||
|
|
||||||
|
async def acreate_file(
|
||||||
|
self,
|
||||||
|
create_file_data: CreateFileRequest,
|
||||||
|
openai_client: AsyncOpenAI,
|
||||||
|
) -> FileObject:
|
||||||
|
response = await openai_client.files.create(**create_file_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def create_file(
|
||||||
|
self,
|
||||||
|
_is_async: bool,
|
||||||
|
create_file_data: CreateFileRequest,
|
||||||
|
api_base: str,
|
||||||
|
api_key: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||||
|
) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
organization=organization,
|
||||||
|
client=client,
|
||||||
|
_is_async=_is_async,
|
||||||
|
)
|
||||||
|
if openai_client is None:
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||||
|
)
|
||||||
|
|
||||||
|
if _is_async is True:
|
||||||
|
if not isinstance(openai_client, AsyncOpenAI):
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||||
|
)
|
||||||
|
return self.acreate_file( # type: ignore
|
||||||
|
create_file_data=create_file_data, openai_client=openai_client
|
||||||
|
)
|
||||||
|
response = openai_client.files.create(**create_file_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
async def afile_content(
|
||||||
|
self,
|
||||||
|
file_content_request: FileContentRequest,
|
||||||
|
openai_client: AsyncOpenAI,
|
||||||
|
) -> HttpxBinaryResponseContent:
|
||||||
|
response = await openai_client.files.content(**file_content_request)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def file_content(
|
||||||
|
self,
|
||||||
|
_is_async: bool,
|
||||||
|
file_content_request: FileContentRequest,
|
||||||
|
api_base: str,
|
||||||
|
api_key: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||||
|
) -> Union[
|
||||||
|
HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
|
||||||
|
]:
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
organization=organization,
|
||||||
|
client=client,
|
||||||
|
_is_async=_is_async,
|
||||||
|
)
|
||||||
|
if openai_client is None:
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||||
|
)
|
||||||
|
|
||||||
|
if _is_async is True:
|
||||||
|
if not isinstance(openai_client, AsyncOpenAI):
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||||
|
)
|
||||||
|
return self.afile_content( # type: ignore
|
||||||
|
file_content_request=file_content_request,
|
||||||
|
openai_client=openai_client,
|
||||||
|
)
|
||||||
|
response = openai_client.files.content(**file_content_request)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIBatchesAPI(BaseLLM):
|
||||||
|
"""
|
||||||
|
OpenAI methods to support for batches
|
||||||
|
- create_batch()
|
||||||
|
- retrieve_batch()
|
||||||
|
- cancel_batch()
|
||||||
|
- list_batch()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def get_openai_client(
|
||||||
|
self,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||||
|
_is_async: bool = False,
|
||||||
|
) -> Optional[Union[OpenAI, AsyncOpenAI]]:
|
||||||
|
received_args = locals()
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
|
||||||
|
if client is None:
|
||||||
|
data = {}
|
||||||
|
for k, v in received_args.items():
|
||||||
|
if k == "self" or k == "client" or k == "_is_async":
|
||||||
|
pass
|
||||||
|
elif k == "api_base" and v is not None:
|
||||||
|
data["base_url"] = v
|
||||||
|
elif v is not None:
|
||||||
|
data[k] = v
|
||||||
|
if _is_async is True:
|
||||||
|
openai_client = AsyncOpenAI(**data)
|
||||||
|
else:
|
||||||
|
openai_client = OpenAI(**data) # type: ignore
|
||||||
|
else:
|
||||||
|
openai_client = client
|
||||||
|
|
||||||
|
return openai_client
|
||||||
|
|
||||||
|
async def acreate_batch(
|
||||||
|
self,
|
||||||
|
create_batch_data: CreateBatchRequest,
|
||||||
|
openai_client: AsyncOpenAI,
|
||||||
|
) -> Batch:
|
||||||
|
response = await openai_client.batches.create(**create_batch_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def create_batch(
|
||||||
|
self,
|
||||||
|
_is_async: bool,
|
||||||
|
create_batch_data: CreateBatchRequest,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||||
|
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
organization=organization,
|
||||||
|
client=client,
|
||||||
|
_is_async=_is_async,
|
||||||
|
)
|
||||||
|
if openai_client is None:
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||||
|
)
|
||||||
|
|
||||||
|
if _is_async is True:
|
||||||
|
if not isinstance(openai_client, AsyncOpenAI):
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||||
|
)
|
||||||
|
return self.acreate_batch( # type: ignore
|
||||||
|
create_batch_data=create_batch_data, openai_client=openai_client
|
||||||
|
)
|
||||||
|
response = openai_client.batches.create(**create_batch_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
async def aretrieve_batch(
|
||||||
|
self,
|
||||||
|
retrieve_batch_data: RetrieveBatchRequest,
|
||||||
|
openai_client: AsyncOpenAI,
|
||||||
|
) -> Batch:
|
||||||
|
response = await openai_client.batches.retrieve(**retrieve_batch_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def retrieve_batch(
|
||||||
|
self,
|
||||||
|
_is_async: bool,
|
||||||
|
retrieve_batch_data: RetrieveBatchRequest,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[OpenAI] = None,
|
||||||
|
):
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
organization=organization,
|
||||||
|
client=client,
|
||||||
|
_is_async=_is_async,
|
||||||
|
)
|
||||||
|
if openai_client is None:
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||||
|
)
|
||||||
|
|
||||||
|
if _is_async is True:
|
||||||
|
if not isinstance(openai_client, AsyncOpenAI):
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||||
|
)
|
||||||
|
return self.aretrieve_batch( # type: ignore
|
||||||
|
retrieve_batch_data=retrieve_batch_data, openai_client=openai_client
|
||||||
|
)
|
||||||
|
response = openai_client.batches.retrieve(**retrieve_batch_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def cancel_batch(
|
||||||
|
self,
|
||||||
|
_is_async: bool,
|
||||||
|
cancel_batch_data: CancelBatchRequest,
|
||||||
|
api_key: Optional[str],
|
||||||
|
api_base: Optional[str],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
max_retries: Optional[int],
|
||||||
|
organization: Optional[str],
|
||||||
|
client: Optional[OpenAI] = None,
|
||||||
|
):
|
||||||
|
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=timeout,
|
||||||
|
max_retries=max_retries,
|
||||||
|
organization=organization,
|
||||||
|
client=client,
|
||||||
|
_is_async=_is_async,
|
||||||
|
)
|
||||||
|
if openai_client is None:
|
||||||
|
raise ValueError(
|
||||||
|
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||||
|
)
|
||||||
|
response = openai_client.batches.cancel(**cancel_batch_data)
|
||||||
|
return response
|
||||||
|
|
||||||
|
# def list_batch(
|
||||||
|
# self,
|
||||||
|
# list_batch_data: ListBatchRequest,
|
||||||
|
# api_key: Optional[str],
|
||||||
|
# api_base: Optional[str],
|
||||||
|
# timeout: Union[float, httpx.Timeout],
|
||||||
|
# max_retries: Optional[int],
|
||||||
|
# organization: Optional[str],
|
||||||
|
# client: Optional[OpenAI] = None,
|
||||||
|
# ):
|
||||||
|
# openai_client: OpenAI = self.get_openai_client(
|
||||||
|
# api_key=api_key,
|
||||||
|
# api_base=api_base,
|
||||||
|
# timeout=timeout,
|
||||||
|
# max_retries=max_retries,
|
||||||
|
# organization=organization,
|
||||||
|
# client=client,
|
||||||
|
# )
|
||||||
|
# response = openai_client.batches.list(**list_batch_data)
|
||||||
|
# return response
|
||||||
|
|
||||||
|
|
||||||
class OpenAIAssistantsAPI(BaseLLM):
|
class OpenAIAssistantsAPI(BaseLLM):
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
154
litellm/main.py
154
litellm/main.py
|
@ -14,7 +14,6 @@ from functools import partial
|
||||||
import dotenv, traceback, random, asyncio, time, contextvars
|
import dotenv, traceback, random, asyncio, time, contextvars
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from ._logging import verbose_logger
|
from ._logging import verbose_logger
|
||||||
from litellm import ( # type: ignore
|
from litellm import ( # type: ignore
|
||||||
|
@ -92,6 +91,7 @@ import tiktoken
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from typing import Callable, List, Optional, Dict, Union, Mapping
|
from typing import Callable, List, Optional, Dict, Union, Mapping
|
||||||
from .caching import enable_cache, disable_cache, update_cache
|
from .caching import enable_cache, disable_cache, update_cache
|
||||||
|
from .types.llms.openai import HttpxBinaryResponseContent
|
||||||
|
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
|
@ -680,6 +680,7 @@ def completion(
|
||||||
"region_name",
|
"region_name",
|
||||||
"allowed_model_region",
|
"allowed_model_region",
|
||||||
"model_config",
|
"model_config",
|
||||||
|
"fastest_response",
|
||||||
]
|
]
|
||||||
|
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
|
@ -4130,6 +4131,24 @@ def transcription(
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "openai":
|
elif custom_llm_provider == "openai":
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("OPENAI_API_BASE")
|
||||||
|
or "https://api.openai.com/v1"
|
||||||
|
) # type: ignore
|
||||||
|
openai.organization = (
|
||||||
|
litellm.organization
|
||||||
|
or get_secret("OPENAI_ORGANIZATION")
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
)
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
api_key
|
||||||
|
or litellm.api_key
|
||||||
|
or litellm.openai_key
|
||||||
|
or get_secret("OPENAI_API_KEY")
|
||||||
|
) # type: ignore
|
||||||
response = openai_chat_completions.audio_transcriptions(
|
response = openai_chat_completions.audio_transcriptions(
|
||||||
model=model,
|
model=model,
|
||||||
audio_file=file,
|
audio_file=file,
|
||||||
|
@ -4139,6 +4158,139 @@ def transcription(
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
logging_obj=litellm_logging_obj,
|
logging_obj=litellm_logging_obj,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
api_base=api_base,
|
||||||
|
api_key=api_key,
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
|
async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent:
|
||||||
|
"""
|
||||||
|
Calls openai tts endpoints.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
|
### PASS ARGS TO Image Generation ###
|
||||||
|
kwargs["aspeech"] = True
|
||||||
|
custom_llm_provider = kwargs.get("custom_llm_provider", None)
|
||||||
|
try:
|
||||||
|
# Use a partial function to pass your keyword arguments
|
||||||
|
func = partial(speech, *args, **kwargs)
|
||||||
|
|
||||||
|
# Add the context to the function
|
||||||
|
ctx = contextvars.copy_context()
|
||||||
|
func_with_context = partial(ctx.run, func)
|
||||||
|
|
||||||
|
_, custom_llm_provider, _, _ = get_llm_provider(
|
||||||
|
model=model, api_base=kwargs.get("api_base", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Await normally
|
||||||
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
if asyncio.iscoroutine(init_response):
|
||||||
|
response = await init_response
|
||||||
|
else:
|
||||||
|
# Call the synchronous function using run_in_executor
|
||||||
|
response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
return response # type: ignore
|
||||||
|
except Exception as e:
|
||||||
|
custom_llm_provider = custom_llm_provider or "openai"
|
||||||
|
raise exception_type(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
original_exception=e,
|
||||||
|
completion_kwargs=args,
|
||||||
|
extra_kwargs=kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@client
|
||||||
|
def speech(
|
||||||
|
model: str,
|
||||||
|
input: str,
|
||||||
|
voice: str,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
organization: Optional[str] = None,
|
||||||
|
project: Optional[str] = None,
|
||||||
|
max_retries: Optional[int] = None,
|
||||||
|
metadata: Optional[dict] = None,
|
||||||
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||||
|
response_format: Optional[str] = None,
|
||||||
|
speed: Optional[int] = None,
|
||||||
|
client=None,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
aspeech: Optional[bool] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> HttpxBinaryResponseContent:
|
||||||
|
|
||||||
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore
|
||||||
|
|
||||||
|
optional_params = {}
|
||||||
|
if response_format is not None:
|
||||||
|
optional_params["response_format"] = response_format
|
||||||
|
if speed is not None:
|
||||||
|
optional_params["speed"] = speed # type: ignore
|
||||||
|
|
||||||
|
if timeout is None:
|
||||||
|
timeout = litellm.request_timeout
|
||||||
|
|
||||||
|
if max_retries is None:
|
||||||
|
max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES
|
||||||
|
response: Optional[HttpxBinaryResponseContent] = None
|
||||||
|
if custom_llm_provider == "openai":
|
||||||
|
api_base = (
|
||||||
|
api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("OPENAI_API_BASE")
|
||||||
|
or "https://api.openai.com/v1"
|
||||||
|
) # type: ignore
|
||||||
|
# set API KEY
|
||||||
|
api_key = (
|
||||||
|
api_key
|
||||||
|
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||||
|
or litellm.openai_key
|
||||||
|
or get_secret("OPENAI_API_KEY")
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
organization = (
|
||||||
|
organization
|
||||||
|
or litellm.organization
|
||||||
|
or get_secret("OPENAI_ORGANIZATION")
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
project = (
|
||||||
|
project
|
||||||
|
or litellm.project
|
||||||
|
or get_secret("OPENAI_PROJECT")
|
||||||
|
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
headers = headers or litellm.headers
|
||||||
|
|
||||||
|
response = openai_chat_completions.audio_speech(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
voice=voice,
|
||||||
|
optional_params=optional_params,
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
organization=organization,
|
||||||
|
project=project,
|
||||||
|
max_retries=max_retries,
|
||||||
|
timeout=timeout,
|
||||||
|
client=client, # pass AsyncOpenAI, OpenAI client
|
||||||
|
aspeech=aspeech,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response is None:
|
||||||
|
raise Exception(
|
||||||
|
"Unable to map the custom llm provider={} to a known provider={}.".format(
|
||||||
|
custom_llm_provider, litellm.provider_list
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
|
@ -1265,8 +1265,8 @@
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.000015,
|
||||||
"output_cost_per_token": 0.0000075,
|
"output_cost_per_token": 0.000075,
|
||||||
"litellm_provider": "vertex_ai-anthropic_models",
|
"litellm_provider": "vertex_ai-anthropic_models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5d93d4a9fa59d72f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/33354d8285fe572e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/33354d8285fe572e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45014,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"359\",\"static/chunks/359-f105a7fb61fe8110.js\",\"440\",\"static/chunks/440-b9a05f116e1a696d.js\",\"134\",\"static/chunks/134-4a7b43f992182f2c.js\",\"931\",\"static/chunks/app/page-f610596e5fb3cce4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/33354d8285fe572e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PcGFjo5-03lHREJ3E0k6y\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
|
3:I[45014,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","359","static/chunks/359-f105a7fb61fe8110.js","440","static/chunks/440-b9a05f116e1a696d.js","134","static/chunks/134-4a7b43f992182f2c.js","931","static/chunks/app/page-f610596e5fb3cce4.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
7
litellm/proxy/_experimental/out/model_hub.txt
Normal file
7
litellm/proxy/_experimental/out/model_hub.txt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
2:I[77831,[],""]
|
||||||
|
3:I[87494,["359","static/chunks/359-f105a7fb61fe8110.js","134","static/chunks/134-4a7b43f992182f2c.js","418","static/chunks/app/model_hub/page-aa3c10cf9bb31255.js"],""]
|
||||||
|
4:I[5613,[],""]
|
||||||
|
5:I[31778,[],""]
|
||||||
|
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
|
1:null
|
|
@ -1,42 +1,16 @@
|
||||||
general_settings:
|
|
||||||
alert_to_webhook_url:
|
|
||||||
budget_alerts: https://hooks.slack.com/services/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
|
|
||||||
daily_reports: https://hooks.slack.com/services/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
|
|
||||||
db_exceptions: https://hooks.slack.com/services/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
|
|
||||||
llm_exceptions: https://hooks.slack.com/services/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
|
|
||||||
llm_requests_hanging: https://hooks.slack.com/services/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
|
|
||||||
llm_too_slow: https://hooks.slack.com/services/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
|
|
||||||
outage_alerts: https://hooks.slack.com/services/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
|
|
||||||
alert_types:
|
|
||||||
- llm_exceptions
|
|
||||||
- llm_too_slow
|
|
||||||
- llm_requests_hanging
|
|
||||||
- budget_alerts
|
|
||||||
- db_exceptions
|
|
||||||
- daily_reports
|
|
||||||
- spend_reports
|
|
||||||
- cooldown_deployment
|
|
||||||
- new_model_added
|
|
||||||
- outage_alerts
|
|
||||||
alerting:
|
|
||||||
- slack
|
|
||||||
database_connection_pool_limit: 100
|
|
||||||
database_connection_timeout: 60
|
|
||||||
health_check_interval: 300
|
|
||||||
ui_access_mode: all
|
|
||||||
# litellm_settings:
|
|
||||||
# json_logs: true
|
|
||||||
model_list:
|
model_list:
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
api_base: http://0.0.0.0:8080
|
api_base: http://0.0.0.0:8080
|
||||||
api_key: ''
|
api_key: ''
|
||||||
model: openai/my-fake-model
|
model: openai/my-fake-model
|
||||||
|
rpm: 800
|
||||||
model_name: gpt-3.5-turbo-fake-model
|
model_name: gpt-3.5-turbo-fake-model
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
||||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
api_key: os.environ/AZURE_EUROPE_API_KEY
|
||||||
model: azure/gpt-35-turbo
|
model: azure/gpt-35-turbo
|
||||||
model_name: gpt-3.5-turbo
|
rpm: 10
|
||||||
|
model_name: gpt-3.5-turbo-fake-model
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
@ -52,5 +26,8 @@ model_list:
|
||||||
api_version: '2023-05-15'
|
api_version: '2023-05-15'
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
model_name: gpt-3.5-turbo
|
model_name: gpt-3.5-turbo
|
||||||
|
- model_name: tts
|
||||||
|
litellm_params:
|
||||||
|
model: openai/tts-1
|
||||||
router_settings:
|
router_settings:
|
||||||
enable_pre_call_checks: true
|
enable_pre_call_checks: true
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
|
from pydantic import BaseModel, Extra, Field, model_validator, Json, ConfigDict
|
||||||
from dataclasses import fields
|
from dataclasses import fields
|
||||||
import enum
|
import enum
|
||||||
from typing import Optional, List, Union, Dict, Literal, Any
|
from typing import Optional, List, Union, Dict, Literal, Any
|
||||||
|
@ -7,6 +7,75 @@ import uuid, json, sys, os
|
||||||
from litellm.types.router import UpdateRouterConfig
|
from litellm.types.router import UpdateRouterConfig
|
||||||
from litellm.types.utils import ProviderField
|
from litellm.types.utils import ProviderField
|
||||||
|
|
||||||
|
|
||||||
|
class LitellmUserRoles(str, enum.Enum):
|
||||||
|
"""
|
||||||
|
Admin Roles:
|
||||||
|
PROXY_ADMIN: admin over the platform
|
||||||
|
PROXY_ADMIN_VIEW_ONLY: can login, view all own keys, view all spend
|
||||||
|
|
||||||
|
Internal User Roles:
|
||||||
|
INTERNAL_USER: can login, view/create/delete their own keys, view their spend
|
||||||
|
INTERNAL_USER_VIEW_ONLY: can login, view their own keys, view their own spend
|
||||||
|
|
||||||
|
|
||||||
|
Team Roles:
|
||||||
|
TEAM: used for JWT auth
|
||||||
|
|
||||||
|
|
||||||
|
Customer Roles:
|
||||||
|
CUSTOMER: External users -> these are customers
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Admin Roles
|
||||||
|
PROXY_ADMIN = "proxy_admin"
|
||||||
|
PROXY_ADMIN_VIEW_ONLY = "proxy_admin_viewer"
|
||||||
|
|
||||||
|
# Internal User Roles
|
||||||
|
INTERNAL_USER = "internal_user"
|
||||||
|
INTERNAL_USER_VIEW_ONLY = "internal_user_viewer"
|
||||||
|
|
||||||
|
# Team Roles
|
||||||
|
TEAM = "team"
|
||||||
|
|
||||||
|
# Customer Roles - External users of proxy
|
||||||
|
CUSTOMER = "customer"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.value)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def description(self):
|
||||||
|
"""
|
||||||
|
Descriptions for the enum values
|
||||||
|
"""
|
||||||
|
descriptions = {
|
||||||
|
"proxy_admin": "admin over litellm proxy, has all permissions",
|
||||||
|
"proxy_admin_viewer": "view all keys, view all spend",
|
||||||
|
"internal_user": "view/create/delete their own keys, view their own spend",
|
||||||
|
"internal_user_viewer": "view their own keys, view their own spend",
|
||||||
|
"team": "team scope used for JWT auth",
|
||||||
|
"customer": "customer",
|
||||||
|
}
|
||||||
|
return descriptions.get(self.value, "")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ui_label(self):
|
||||||
|
"""
|
||||||
|
UI labels for the enum values
|
||||||
|
"""
|
||||||
|
ui_labels = {
|
||||||
|
"proxy_admin": "Admin (All Permissions)",
|
||||||
|
"proxy_admin_viewer": "Admin (View Only)",
|
||||||
|
"internal_user": "Internal User (Create/Delete/View)",
|
||||||
|
"internal_user_viewer": "Internal User (View Only)",
|
||||||
|
"team": "Team",
|
||||||
|
"customer": "Customer",
|
||||||
|
}
|
||||||
|
return ui_labels.get(self.value, "")
|
||||||
|
|
||||||
|
|
||||||
AlertType = Literal[
|
AlertType = Literal[
|
||||||
"llm_exceptions",
|
"llm_exceptions",
|
||||||
"llm_too_slow",
|
"llm_too_slow",
|
||||||
|
@ -50,8 +119,7 @@ class LiteLLMBase(BaseModel):
|
||||||
# if using pydantic v1
|
# if using pydantic v1
|
||||||
return self.__fields_set__
|
return self.__fields_set__
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
|
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
|
||||||
|
@ -99,6 +167,14 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
# moderations
|
# moderations
|
||||||
"/moderations",
|
"/moderations",
|
||||||
"/v1/moderations",
|
"/v1/moderations",
|
||||||
|
# batches
|
||||||
|
"/v1/batches",
|
||||||
|
"/batches",
|
||||||
|
"/v1/batches{batch_id}",
|
||||||
|
"/batches{batch_id}",
|
||||||
|
# files
|
||||||
|
"/v1/files",
|
||||||
|
"/files",
|
||||||
# models
|
# models
|
||||||
"/models",
|
"/models",
|
||||||
"/v1/models",
|
"/v1/models",
|
||||||
|
@ -272,7 +348,8 @@ class LiteLLMPromptInjectionParams(LiteLLMBase):
|
||||||
description="Return rejected request error message as a string to the user. Default behaviour is to raise an exception.",
|
description="Return rejected request error message as a string to the user. Default behaviour is to raise an exception.",
|
||||||
)
|
)
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def check_llm_api_params(cls, values):
|
def check_llm_api_params(cls, values):
|
||||||
llm_api_check = values.get("llm_api_check")
|
llm_api_check = values.get("llm_api_check")
|
||||||
if llm_api_check is True:
|
if llm_api_check is True:
|
||||||
|
@ -330,8 +407,7 @@ class ProxyChatCompletionRequest(LiteLLMBase):
|
||||||
deployment_id: Optional[str] = None
|
deployment_id: Optional[str] = None
|
||||||
request_timeout: Optional[int] = None
|
request_timeout: Optional[int] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="allow") # allow params not defined here, these fall in litellm.completion(**kwargs)
|
||||||
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class ModelInfoDelete(LiteLLMBase):
|
class ModelInfoDelete(LiteLLMBase):
|
||||||
|
@ -358,11 +434,10 @@ class ModelInfo(LiteLLMBase):
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=(), extra="allow")
|
||||||
extra = Extra.allow # Allow extra fields
|
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("id") is None:
|
if values.get("id") is None:
|
||||||
values.update({"id": str(uuid.uuid4())})
|
values.update({"id": str(uuid.uuid4())})
|
||||||
|
@ -393,10 +468,10 @@ class ModelParams(LiteLLMBase):
|
||||||
litellm_params: dict
|
litellm_params: dict
|
||||||
model_info: ModelInfo
|
model_info: ModelInfo
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("model_info") is None:
|
if values.get("model_info") is None:
|
||||||
values.update({"model_info": ModelInfo()})
|
values.update({"model_info": ModelInfo()})
|
||||||
|
@ -432,8 +507,7 @@ class GenerateKeyRequest(GenerateRequestBase):
|
||||||
{}
|
{}
|
||||||
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyResponse(GenerateKeyRequest):
|
class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
|
@ -443,7 +517,8 @@ class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
token_id: Optional[str] = None
|
token_id: Optional[str] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("token") is not None:
|
if values.get("token") is not None:
|
||||||
values.update({"key": values.get("token")})
|
values.update({"key": values.get("token")})
|
||||||
|
@ -483,14 +558,22 @@ class LiteLLM_ModelTable(LiteLLMBase):
|
||||||
created_by: str
|
created_by: str
|
||||||
updated_by: str
|
updated_by: str
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class NewUserRequest(GenerateKeyRequest):
|
class NewUserRequest(GenerateKeyRequest):
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
user_role: Optional[str] = None
|
user_role: Optional[
|
||||||
|
Literal[
|
||||||
|
LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
|
||||||
|
LitellmUserRoles.INTERNAL_USER,
|
||||||
|
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
|
||||||
|
LitellmUserRoles.TEAM,
|
||||||
|
LitellmUserRoles.CUSTOMER,
|
||||||
|
]
|
||||||
|
] = None
|
||||||
teams: Optional[list] = None
|
teams: Optional[list] = None
|
||||||
organization_id: Optional[str] = None
|
organization_id: Optional[str] = None
|
||||||
auto_create_key: bool = (
|
auto_create_key: bool = (
|
||||||
|
@ -509,10 +592,20 @@ class UpdateUserRequest(GenerateRequestBase):
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
spend: Optional[float] = None
|
spend: Optional[float] = None
|
||||||
metadata: Optional[dict] = None
|
metadata: Optional[dict] = None
|
||||||
user_role: Optional[str] = None
|
user_role: Optional[
|
||||||
|
Literal[
|
||||||
|
LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
|
||||||
|
LitellmUserRoles.INTERNAL_USER,
|
||||||
|
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
|
||||||
|
LitellmUserRoles.TEAM,
|
||||||
|
LitellmUserRoles.CUSTOMER,
|
||||||
|
]
|
||||||
|
] = None
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
|
@ -536,7 +629,8 @@ class NewCustomerRequest(LiteLLMBase):
|
||||||
None # if no equivalent model in allowed region - default all requests to this model
|
None # if no equivalent model in allowed region - default all requests to this model
|
||||||
)
|
)
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("max_budget") is not None and values.get("budget_id") is not None:
|
if values.get("max_budget") is not None and values.get("budget_id") is not None:
|
||||||
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
|
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
|
||||||
|
@ -576,7 +670,8 @@ class Member(LiteLLMBase):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
|
@ -605,8 +700,7 @@ class TeamBase(LiteLLMBase):
|
||||||
class NewTeamRequest(TeamBase):
|
class NewTeamRequest(TeamBase):
|
||||||
model_aliases: Optional[dict] = None
|
model_aliases: Optional[dict] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalEndUsersSpend(LiteLLMBase):
|
class GlobalEndUsersSpend(LiteLLMBase):
|
||||||
|
@ -626,7 +720,8 @@ class TeamMemberDeleteRequest(LiteLLMBase):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
|
@ -692,10 +787,10 @@ class LiteLLM_TeamTable(TeamBase):
|
||||||
budget_reset_at: Optional[datetime] = None
|
budget_reset_at: Optional[datetime] = None
|
||||||
model_id: Optional[int] = None
|
model_id: Optional[int] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
dict_fields = [
|
dict_fields = [
|
||||||
"metadata",
|
"metadata",
|
||||||
|
@ -731,8 +826,7 @@ class LiteLLM_BudgetTable(LiteLLMBase):
|
||||||
model_max_budget: Optional[dict] = None
|
model_max_budget: Optional[dict] = None
|
||||||
budget_duration: Optional[str] = None
|
budget_duration: Optional[str] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
|
class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
|
||||||
|
@ -745,8 +839,7 @@ class LiteLLM_TeamMemberTable(LiteLLM_BudgetTable):
|
||||||
team_id: Optional[str] = None
|
team_id: Optional[str] = None
|
||||||
budget_id: Optional[str] = None
|
budget_id: Optional[str] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class NewOrganizationRequest(LiteLLM_BudgetTable):
|
class NewOrganizationRequest(LiteLLM_BudgetTable):
|
||||||
|
@ -825,8 +918,7 @@ class KeyManagementSettings(LiteLLMBase):
|
||||||
class TeamDefaultSettings(LiteLLMBase):
|
class TeamDefaultSettings(LiteLLMBase):
|
||||||
team_id: str
|
team_id: str
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="allow") # allow params not defined here, these fall in litellm.completion(**kwargs)
|
||||||
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class DynamoDBArgs(LiteLLMBase):
|
class DynamoDBArgs(LiteLLMBase):
|
||||||
|
@ -988,8 +1080,7 @@ class ConfigYAML(LiteLLMBase):
|
||||||
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
|
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
|
||||||
)
|
)
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_VerificationToken(LiteLLMBase):
|
class LiteLLM_VerificationToken(LiteLLMBase):
|
||||||
|
@ -1019,9 +1110,7 @@ class LiteLLM_VerificationToken(LiteLLMBase):
|
||||||
|
|
||||||
org_id: Optional[str] = None # org id for a given key
|
org_id: Optional[str] = None # org id for a given key
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
||||||
"""
|
"""
|
||||||
|
@ -1043,6 +1132,7 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
||||||
end_user_id: Optional[str] = None
|
end_user_id: Optional[str] = None
|
||||||
end_user_tpm_limit: Optional[int] = None
|
end_user_tpm_limit: Optional[int] = None
|
||||||
end_user_rpm_limit: Optional[int] = None
|
end_user_rpm_limit: Optional[int] = None
|
||||||
|
end_user_max_budget: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
class UserAPIKeyAuth(
|
class UserAPIKeyAuth(
|
||||||
|
@ -1053,10 +1143,20 @@ class UserAPIKeyAuth(
|
||||||
"""
|
"""
|
||||||
|
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
|
user_role: Optional[
|
||||||
|
Literal[
|
||||||
|
LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY,
|
||||||
|
LitellmUserRoles.INTERNAL_USER,
|
||||||
|
LitellmUserRoles.INTERNAL_USER_VIEW_ONLY,
|
||||||
|
LitellmUserRoles.TEAM,
|
||||||
|
LitellmUserRoles.CUSTOMER,
|
||||||
|
]
|
||||||
|
] = None
|
||||||
allowed_model_region: Optional[Literal["eu"]] = None
|
allowed_model_region: Optional[Literal["eu"]] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def check_api_key(cls, values):
|
def check_api_key(cls, values):
|
||||||
if values.get("api_key") is not None:
|
if values.get("api_key") is not None:
|
||||||
values.update({"token": hash_token(values.get("api_key"))})
|
values.update({"token": hash_token(values.get("api_key"))})
|
||||||
|
@ -1083,7 +1183,8 @@ class LiteLLM_UserTable(LiteLLMBase):
|
||||||
tpm_limit: Optional[int] = None
|
tpm_limit: Optional[int] = None
|
||||||
rpm_limit: Optional[int] = None
|
rpm_limit: Optional[int] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("spend") is None:
|
if values.get("spend") is None:
|
||||||
values.update({"spend": 0.0})
|
values.update({"spend": 0.0})
|
||||||
|
@ -1091,8 +1192,7 @@ class LiteLLM_UserTable(LiteLLMBase):
|
||||||
values.update({"models": []})
|
values.update({"models": []})
|
||||||
return values
|
return values
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_EndUserTable(LiteLLMBase):
|
class LiteLLM_EndUserTable(LiteLLMBase):
|
||||||
|
@ -1104,14 +1204,14 @@ class LiteLLM_EndUserTable(LiteLLMBase):
|
||||||
default_model: Optional[str] = None
|
default_model: Optional[str] = None
|
||||||
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("spend") is None:
|
if values.get("spend") is None:
|
||||||
values.update({"spend": 0.0})
|
values.update({"spend": 0.0})
|
||||||
return values
|
return values
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_SpendLogs(LiteLLMBase):
|
class LiteLLM_SpendLogs(LiteLLMBase):
|
||||||
|
@ -1170,6 +1270,7 @@ class CallInfo(LiteLLMBase):
|
||||||
spend: float
|
spend: float
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
token: str = Field(description="Hashed value of that key")
|
token: str = Field(description="Hashed value of that key")
|
||||||
|
customer_id: Optional[str] = None
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
team_id: Optional[str] = None
|
team_id: Optional[str] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
|
@ -1180,9 +1281,13 @@ class CallInfo(LiteLLMBase):
|
||||||
|
|
||||||
class WebhookEvent(CallInfo):
|
class WebhookEvent(CallInfo):
|
||||||
event: Literal[
|
event: Literal[
|
||||||
"budget_crossed", "threshold_crossed", "projected_limit_exceeded", "key_created"
|
"budget_crossed",
|
||||||
|
"threshold_crossed",
|
||||||
|
"projected_limit_exceeded",
|
||||||
|
"key_created",
|
||||||
|
"spend_tracked",
|
||||||
]
|
]
|
||||||
event_group: Literal["user", "key", "team", "proxy"]
|
event_group: Literal["internal_user", "key", "team", "proxy", "customer"]
|
||||||
event_message: str # human-readable description of event
|
event_message: str # human-readable description of event
|
||||||
|
|
||||||
|
|
||||||
|
@ -1215,6 +1320,7 @@ class InvitationModel(LiteLLMBase):
|
||||||
updated_at: datetime
|
updated_at: datetime
|
||||||
updated_by: str
|
updated_by: str
|
||||||
|
|
||||||
|
|
||||||
class ConfigFieldInfo(LiteLLMBase):
|
class ConfigFieldInfo(LiteLLMBase):
|
||||||
field_name: str
|
field_name: str
|
||||||
field_value: Any
|
field_value: Any
|
||||||
|
|
|
@ -15,6 +15,7 @@ from litellm.proxy._types import (
|
||||||
LiteLLM_TeamTable,
|
LiteLLM_TeamTable,
|
||||||
LiteLLMRoutes,
|
LiteLLMRoutes,
|
||||||
LiteLLM_OrganizationTable,
|
LiteLLM_OrganizationTable,
|
||||||
|
LitellmUserRoles,
|
||||||
)
|
)
|
||||||
from typing import Optional, Literal, Union
|
from typing import Optional, Literal, Union
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
@ -133,7 +134,11 @@ def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool:
|
||||||
|
|
||||||
|
|
||||||
def allowed_routes_check(
|
def allowed_routes_check(
|
||||||
user_role: Literal["proxy_admin", "team", "user"],
|
user_role: Literal[
|
||||||
|
LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
LitellmUserRoles.TEAM,
|
||||||
|
LitellmUserRoles.INTERNAL_USER,
|
||||||
|
],
|
||||||
user_route: str,
|
user_route: str,
|
||||||
litellm_proxy_roles: LiteLLM_JWTAuth,
|
litellm_proxy_roles: LiteLLM_JWTAuth,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
@ -141,14 +146,14 @@ def allowed_routes_check(
|
||||||
Check if user -> not admin - allowed to access these routes
|
Check if user -> not admin - allowed to access these routes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if user_role == "proxy_admin":
|
if user_role == LitellmUserRoles.PROXY_ADMIN:
|
||||||
is_allowed = _allowed_routes_check(
|
is_allowed = _allowed_routes_check(
|
||||||
user_route=user_route,
|
user_route=user_route,
|
||||||
allowed_routes=litellm_proxy_roles.admin_allowed_routes,
|
allowed_routes=litellm_proxy_roles.admin_allowed_routes,
|
||||||
)
|
)
|
||||||
return is_allowed
|
return is_allowed
|
||||||
|
|
||||||
elif user_role == "team":
|
elif user_role == LitellmUserRoles.TEAM:
|
||||||
if litellm_proxy_roles.team_allowed_routes is None:
|
if litellm_proxy_roles.team_allowed_routes is None:
|
||||||
"""
|
"""
|
||||||
By default allow a team to call openai + info routes
|
By default allow a team to call openai + info routes
|
||||||
|
@ -193,13 +198,27 @@ async def get_end_user_object(
|
||||||
if end_user_id is None:
|
if end_user_id is None:
|
||||||
return None
|
return None
|
||||||
_key = "end_user_id:{}".format(end_user_id)
|
_key = "end_user_id:{}".format(end_user_id)
|
||||||
|
|
||||||
|
def check_in_budget(end_user_obj: LiteLLM_EndUserTable):
|
||||||
|
if end_user_obj.litellm_budget_table is None:
|
||||||
|
return
|
||||||
|
end_user_budget = end_user_obj.litellm_budget_table.max_budget
|
||||||
|
if end_user_budget is not None and end_user_obj.spend > end_user_budget:
|
||||||
|
raise litellm.BudgetExceededError(
|
||||||
|
current_cost=end_user_obj.spend, max_budget=end_user_budget
|
||||||
|
)
|
||||||
|
|
||||||
# check if in cache
|
# check if in cache
|
||||||
cached_user_obj = await user_api_key_cache.async_get_cache(key=_key)
|
cached_user_obj = await user_api_key_cache.async_get_cache(key=_key)
|
||||||
if cached_user_obj is not None:
|
if cached_user_obj is not None:
|
||||||
if isinstance(cached_user_obj, dict):
|
if isinstance(cached_user_obj, dict):
|
||||||
return LiteLLM_EndUserTable(**cached_user_obj)
|
return_obj = LiteLLM_EndUserTable(**cached_user_obj)
|
||||||
|
check_in_budget(end_user_obj=return_obj)
|
||||||
|
return return_obj
|
||||||
elif isinstance(cached_user_obj, LiteLLM_EndUserTable):
|
elif isinstance(cached_user_obj, LiteLLM_EndUserTable):
|
||||||
return cached_user_obj
|
return_obj = cached_user_obj
|
||||||
|
check_in_budget(end_user_obj=return_obj)
|
||||||
|
return return_obj
|
||||||
# else, check db
|
# else, check db
|
||||||
try:
|
try:
|
||||||
response = await prisma_client.db.litellm_endusertable.find_unique(
|
response = await prisma_client.db.litellm_endusertable.find_unique(
|
||||||
|
@ -217,8 +236,12 @@ async def get_end_user_object(
|
||||||
|
|
||||||
_response = LiteLLM_EndUserTable(**response.dict())
|
_response = LiteLLM_EndUserTable(**response.dict())
|
||||||
|
|
||||||
|
check_in_budget(end_user_obj=_response)
|
||||||
|
|
||||||
return _response
|
return _response
|
||||||
except Exception as e: # if end-user not in db
|
except Exception as e: # if end-user not in db
|
||||||
|
if isinstance(e, litellm.BudgetExceededError):
|
||||||
|
raise e
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -429,6 +429,19 @@ def run_server(
|
||||||
|
|
||||||
proxy_config = ProxyConfig()
|
proxy_config = ProxyConfig()
|
||||||
_config = asyncio.run(proxy_config.get_config(config_file_path=config))
|
_config = asyncio.run(proxy_config.get_config(config_file_path=config))
|
||||||
|
### LITELLM SETTINGS ###
|
||||||
|
litellm_settings = _config.get("litellm_settings", None)
|
||||||
|
if (
|
||||||
|
litellm_settings is not None
|
||||||
|
and "json_logs" in litellm_settings
|
||||||
|
and litellm_settings["json_logs"] == True
|
||||||
|
):
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.json_logs = True
|
||||||
|
|
||||||
|
litellm._turn_on_json()
|
||||||
|
### GENERAL SETTINGS ###
|
||||||
general_settings = _config.get("general_settings", {})
|
general_settings = _config.get("general_settings", {})
|
||||||
if general_settings is None:
|
if general_settings is None:
|
||||||
general_settings = {}
|
general_settings = {}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -15,6 +15,7 @@ from litellm.proxy._types import (
|
||||||
WebhookEvent,
|
WebhookEvent,
|
||||||
AlertType,
|
AlertType,
|
||||||
ResetTeamBudgetRequest,
|
ResetTeamBudgetRequest,
|
||||||
|
LitellmUserRoles,
|
||||||
)
|
)
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching import DualCache, RedisCache
|
||||||
from litellm.router import Deployment, ModelInfo, LiteLLM_Params
|
from litellm.router import Deployment, ModelInfo, LiteLLM_Params
|
||||||
|
@ -2637,7 +2638,7 @@ def _is_user_proxy_admin(user_id_information: Optional[list]):
|
||||||
_user = user_id_information[0]
|
_user = user_id_information[0]
|
||||||
if (
|
if (
|
||||||
_user.get("user_role", None) is not None
|
_user.get("user_role", None) is not None
|
||||||
and _user.get("user_role") == "proxy_admin"
|
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -2650,7 +2651,7 @@ def _is_user_proxy_admin(user_id_information: Optional[list]):
|
||||||
|
|
||||||
if (
|
if (
|
||||||
_user.get("user_role", None) is not None
|
_user.get("user_role", None) is not None
|
||||||
and _user.get("user_role") == "proxy_admin"
|
and _user.get("user_role") == LitellmUserRoles.PROXY_ADMIN.value
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,9 @@ class Router:
|
||||||
allowed_fails: Optional[
|
allowed_fails: Optional[
|
||||||
int
|
int
|
||||||
] = None, # Number of times a deployment can failbefore being added to cooldown
|
] = None, # Number of times a deployment can failbefore being added to cooldown
|
||||||
cooldown_time: float = 1, # (seconds) time to cooldown a deployment after failure
|
cooldown_time: Optional[
|
||||||
|
float
|
||||||
|
] = None, # (seconds) time to cooldown a deployment after failure
|
||||||
routing_strategy: Literal[
|
routing_strategy: Literal[
|
||||||
"simple-shuffle",
|
"simple-shuffle",
|
||||||
"least-busy",
|
"least-busy",
|
||||||
|
@ -248,7 +250,7 @@ class Router:
|
||||||
) # initialize an empty list - to allow _add_deployment and delete_deployment to work
|
) # initialize an empty list - to allow _add_deployment and delete_deployment to work
|
||||||
|
|
||||||
self.allowed_fails = allowed_fails or litellm.allowed_fails
|
self.allowed_fails = allowed_fails or litellm.allowed_fails
|
||||||
self.cooldown_time = cooldown_time or 1
|
self.cooldown_time = cooldown_time or 60
|
||||||
self.failed_calls = (
|
self.failed_calls = (
|
||||||
InMemoryCache()
|
InMemoryCache()
|
||||||
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
||||||
|
@ -356,7 +358,8 @@ class Router:
|
||||||
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
|
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
|
||||||
if len(fallback_dict) != 1:
|
if len(fallback_dict) != 1:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
|
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
|
||||||
|
)
|
||||||
|
|
||||||
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
||||||
if routing_strategy == "least-busy":
|
if routing_strategy == "least-busy":
|
||||||
|
@ -662,12 +665,40 @@ class Router:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def abatch_completion(
|
async def abatch_completion(
|
||||||
self, models: List[str], messages: List[Dict[str, str]], **kwargs
|
self,
|
||||||
|
models: List[str],
|
||||||
|
messages: Union[List[Dict[str, str]], List[List[Dict[str, str]]]],
|
||||||
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Async Batch Completion - Batch Process 1 request to multiple model_group on litellm.Router
|
Async Batch Completion. Used for 2 scenarios:
|
||||||
Use this for sending the same request to N models
|
1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
|
||||||
|
2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this
|
||||||
|
|
||||||
|
Example Request for 1 request to N models:
|
||||||
|
```
|
||||||
|
response = await router.abatch_completion(
|
||||||
|
models=["gpt-3.5-turbo", "groq-llama"],
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "is litellm becoming a better product ?"}
|
||||||
|
],
|
||||||
|
max_tokens=15,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Example Request for N requests to M models:
|
||||||
|
```
|
||||||
|
response = await router.abatch_completion(
|
||||||
|
models=["gpt-3.5-turbo", "groq-llama"],
|
||||||
|
messages=[
|
||||||
|
[{"role": "user", "content": "is litellm becoming a better product ?"}],
|
||||||
|
[{"role": "user", "content": "who is this"}],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
############## Helpers for async completion ##################
|
||||||
|
|
||||||
async def _async_completion_no_exceptions(
|
async def _async_completion_no_exceptions(
|
||||||
model: str, messages: List[Dict[str, str]], **kwargs
|
model: str, messages: List[Dict[str, str]], **kwargs
|
||||||
|
@ -680,17 +711,50 @@ class Router:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return e
|
return e
|
||||||
|
|
||||||
_tasks = []
|
async def _async_completion_no_exceptions_return_idx(
|
||||||
for model in models:
|
model: str,
|
||||||
# add each task but if the task fails
|
messages: List[Dict[str, str]],
|
||||||
_tasks.append(
|
idx: int, # index of message this response corresponds to
|
||||||
_async_completion_no_exceptions(
|
**kwargs,
|
||||||
model=model, messages=messages, **kwargs
|
):
|
||||||
|
"""
|
||||||
|
Wrapper around self.async_completion that catches exceptions and returns them as a result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
await self.acompletion(model=model, messages=messages, **kwargs),
|
||||||
|
idx,
|
||||||
)
|
)
|
||||||
)
|
except Exception as e:
|
||||||
|
return e, idx
|
||||||
|
|
||||||
response = await asyncio.gather(*_tasks)
|
############## Helpers for async completion ##################
|
||||||
return response
|
|
||||||
|
if isinstance(messages, list) and all(isinstance(m, dict) for m in messages):
|
||||||
|
_tasks = []
|
||||||
|
for model in models:
|
||||||
|
# add each task but if the task fails
|
||||||
|
_tasks.append(_async_completion_no_exceptions(model=model, messages=messages, **kwargs)) # type: ignore
|
||||||
|
response = await asyncio.gather(*_tasks)
|
||||||
|
return response
|
||||||
|
elif isinstance(messages, list) and all(isinstance(m, list) for m in messages):
|
||||||
|
_tasks = []
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
for model in models:
|
||||||
|
# Request Number X, Model Number Y
|
||||||
|
_tasks.append(
|
||||||
|
_async_completion_no_exceptions_return_idx(
|
||||||
|
model=model, idx=idx, messages=message, **kwargs # type: ignore
|
||||||
|
)
|
||||||
|
)
|
||||||
|
responses = await asyncio.gather(*_tasks)
|
||||||
|
final_responses: List[List[Any]] = [[] for _ in range(len(messages))]
|
||||||
|
for response in responses:
|
||||||
|
if isinstance(response, tuple):
|
||||||
|
final_responses[response[1]].append(response[0])
|
||||||
|
else:
|
||||||
|
final_responses[0].append(response)
|
||||||
|
return final_responses
|
||||||
|
|
||||||
async def abatch_completion_one_model_multiple_requests(
|
async def abatch_completion_one_model_multiple_requests(
|
||||||
self, model: str, messages: List[List[Dict[str, str]]], **kwargs
|
self, model: str, messages: List[List[Dict[str, str]]], **kwargs
|
||||||
|
@ -737,6 +801,101 @@ class Router:
|
||||||
response = await asyncio.gather(*_tasks)
|
response = await asyncio.gather(*_tasks)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
|
||||||
|
@overload
|
||||||
|
async def abatch_completion_fastest_response(
|
||||||
|
self, model: str, messages: List[Dict[str, str]], stream: Literal[True], **kwargs
|
||||||
|
) -> CustomStreamWrapper:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
async def abatch_completion_fastest_response(
|
||||||
|
self, model: str, messages: List[Dict[str, str]], stream: Literal[False] = False, **kwargs
|
||||||
|
) -> ModelResponse:
|
||||||
|
...
|
||||||
|
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
async def abatch_completion_fastest_response(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
stream: bool = False,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
model - List of comma-separated model names. E.g. model="gpt-4, gpt-3.5-turbo"
|
||||||
|
|
||||||
|
Returns fastest response from list of model names. OpenAI-compatible endpoint.
|
||||||
|
"""
|
||||||
|
models = [m.strip() for m in model.split(",")]
|
||||||
|
|
||||||
|
async def _async_completion_no_exceptions(
|
||||||
|
model: str, messages: List[Dict[str, str]], stream: bool, **kwargs: Any
|
||||||
|
) -> Union[ModelResponse, CustomStreamWrapper, Exception]:
|
||||||
|
"""
|
||||||
|
Wrapper around self.acompletion that catches exceptions and returns them as a result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return await self.acompletion(model=model, messages=messages, stream=stream, **kwargs) # type: ignore
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
"Received 'task.cancel'. Cancelling call w/ model={}.".format(model)
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
return e
|
||||||
|
|
||||||
|
pending_tasks = [] # type: ignore
|
||||||
|
|
||||||
|
async def check_response(task: asyncio.Task):
|
||||||
|
nonlocal pending_tasks
|
||||||
|
try:
|
||||||
|
result = await task
|
||||||
|
if isinstance(result, (ModelResponse, CustomStreamWrapper)):
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
"Received successful response. Cancelling other LLM API calls."
|
||||||
|
)
|
||||||
|
# If a desired response is received, cancel all other pending tasks
|
||||||
|
for t in pending_tasks:
|
||||||
|
t.cancel()
|
||||||
|
return result
|
||||||
|
except Exception:
|
||||||
|
# Ignore exceptions, let the loop handle them
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
# Remove the task from pending tasks if it finishes
|
||||||
|
try:
|
||||||
|
pending_tasks.remove(task)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for model in models:
|
||||||
|
task = asyncio.create_task(
|
||||||
|
_async_completion_no_exceptions(
|
||||||
|
model=model, messages=messages, stream=stream, **kwargs
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pending_tasks.append(task)
|
||||||
|
|
||||||
|
# Await the first task to complete successfully
|
||||||
|
while pending_tasks:
|
||||||
|
done, pending_tasks = await asyncio.wait( # type: ignore
|
||||||
|
pending_tasks, return_when=asyncio.FIRST_COMPLETED
|
||||||
|
)
|
||||||
|
for completed_task in done:
|
||||||
|
result = await check_response(completed_task)
|
||||||
|
if result is not None:
|
||||||
|
# Return the first successful result
|
||||||
|
result._hidden_params["fastest_response_batch_completion"] = True
|
||||||
|
return result
|
||||||
|
|
||||||
|
# If we exit the loop without returning, all tasks failed
|
||||||
|
raise Exception("All tasks failed")
|
||||||
|
|
||||||
def image_generation(self, prompt: str, model: str, **kwargs):
|
def image_generation(self, prompt: str, model: str, **kwargs):
|
||||||
try:
|
try:
|
||||||
kwargs["model"] = model
|
kwargs["model"] = model
|
||||||
|
@ -1045,6 +1204,84 @@ class Router:
|
||||||
self.fail_calls[model_name] += 1
|
self.fail_calls[model_name] += 1
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
async def aspeech(self, model: str, input: str, voice: str, **kwargs):
|
||||||
|
"""
|
||||||
|
Example Usage:
|
||||||
|
|
||||||
|
```
|
||||||
|
from litellm import Router
|
||||||
|
client = Router(model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "tts",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "tts-1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
])
|
||||||
|
|
||||||
|
async with client.aspeech(
|
||||||
|
model="tts",
|
||||||
|
voice="alloy",
|
||||||
|
input="the quick brown fox jumped over the lazy dogs",
|
||||||
|
api_base=None,
|
||||||
|
api_key=None,
|
||||||
|
organization=None,
|
||||||
|
project=None,
|
||||||
|
max_retries=1,
|
||||||
|
timeout=600,
|
||||||
|
client=None,
|
||||||
|
optional_params={},
|
||||||
|
) as response:
|
||||||
|
response.stream_to_file(speech_file_path)
|
||||||
|
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
kwargs["input"] = input
|
||||||
|
kwargs["voice"] = voice
|
||||||
|
|
||||||
|
deployment = await self.async_get_available_deployment(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": "prompt"}],
|
||||||
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
)
|
||||||
|
kwargs.setdefault("metadata", {}).update(
|
||||||
|
{
|
||||||
|
"deployment": deployment["litellm_params"]["model"],
|
||||||
|
"model_info": deployment.get("model_info", {}),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
kwargs["model_info"] = deployment.get("model_info", {})
|
||||||
|
data = deployment["litellm_params"].copy()
|
||||||
|
model_name = data["model"]
|
||||||
|
for k, v in self.default_litellm_params.items():
|
||||||
|
if (
|
||||||
|
k not in kwargs
|
||||||
|
): # prioritize model-specific params > default router params
|
||||||
|
kwargs[k] = v
|
||||||
|
elif k == "metadata":
|
||||||
|
kwargs[k].update(v)
|
||||||
|
|
||||||
|
potential_model_client = self._get_client(
|
||||||
|
deployment=deployment, kwargs=kwargs, client_type="async"
|
||||||
|
)
|
||||||
|
# check if provided keys == client keys #
|
||||||
|
dynamic_api_key = kwargs.get("api_key", None)
|
||||||
|
if (
|
||||||
|
dynamic_api_key is not None
|
||||||
|
and potential_model_client is not None
|
||||||
|
and dynamic_api_key != potential_model_client.api_key
|
||||||
|
):
|
||||||
|
model_client = None
|
||||||
|
else:
|
||||||
|
model_client = potential_model_client
|
||||||
|
|
||||||
|
response = await litellm.aspeech(**data, **kwargs)
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
async def amoderation(self, model: str, input: str, **kwargs):
|
async def amoderation(self, model: str, input: str, **kwargs):
|
||||||
try:
|
try:
|
||||||
kwargs["model"] = model
|
kwargs["model"] = model
|
||||||
|
@ -1693,7 +1930,8 @@ class Router:
|
||||||
)
|
)
|
||||||
await asyncio.sleep(_timeout)
|
await asyncio.sleep(_timeout)
|
||||||
try:
|
try:
|
||||||
original_exception.message += f"\nNumber Retries = {current_attempt}"
|
cooldown_deployments = await self._async_get_cooldown_deployments()
|
||||||
|
original_exception.message += f"\nNumber Retries = {current_attempt + 1}, Max Retries={num_retries}\nCooldown Deployments={cooldown_deployments}"
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
@ -1986,7 +2224,7 @@ class Router:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if _time_to_cooldown < 0:
|
if _time_to_cooldown is None or _time_to_cooldown < 0:
|
||||||
# if the response headers did not read it -> set to default cooldown time
|
# if the response headers did not read it -> set to default cooldown time
|
||||||
_time_to_cooldown = self.cooldown_time
|
_time_to_cooldown = self.cooldown_time
|
||||||
|
|
||||||
|
@ -2082,6 +2320,9 @@ class Router:
|
||||||
elif exception_status == 408:
|
elif exception_status == 408:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
elif exception_status == 404:
|
||||||
|
return True
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Do NOT cool down all other 4XX Errors
|
# Do NOT cool down all other 4XX Errors
|
||||||
return False
|
return False
|
||||||
|
@ -2107,6 +2348,7 @@ class Router:
|
||||||
|
|
||||||
the exception is not one that should be immediately retried (e.g. 401)
|
the exception is not one that should be immediately retried (e.g. 401)
|
||||||
"""
|
"""
|
||||||
|
args = locals()
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -2139,7 +2381,6 @@ class Router:
|
||||||
)
|
)
|
||||||
exception_status = 500
|
exception_status = 500
|
||||||
_should_retry = litellm._should_retry(status_code=exception_status)
|
_should_retry = litellm._should_retry(status_code=exception_status)
|
||||||
|
|
||||||
if updated_fails > self.allowed_fails or _should_retry == False:
|
if updated_fails > self.allowed_fails or _should_retry == False:
|
||||||
# get the current cooldown list for that minute
|
# get the current cooldown list for that minute
|
||||||
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
||||||
|
@ -2453,8 +2694,17 @@ class Router:
|
||||||
|
|
||||||
if "azure" in model_name:
|
if "azure" in model_name:
|
||||||
if api_base is None or not isinstance(api_base, str):
|
if api_base is None or not isinstance(api_base, str):
|
||||||
|
filtered_litellm_params = {
|
||||||
|
k: v
|
||||||
|
for k, v in model["litellm_params"].items()
|
||||||
|
if k != "api_key"
|
||||||
|
}
|
||||||
|
_filtered_model = {
|
||||||
|
"model_name": model["model_name"],
|
||||||
|
"litellm_params": filtered_litellm_params,
|
||||||
|
}
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
|
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
||||||
)
|
)
|
||||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||||
if azure_ad_token is not None:
|
if azure_ad_token is not None:
|
||||||
|
@ -3076,6 +3326,8 @@ class Router:
|
||||||
supported_openai_params = litellm.get_supported_openai_params(
|
supported_openai_params = litellm.get_supported_openai_params(
|
||||||
model=model, custom_llm_provider=llm_provider
|
model=model, custom_llm_provider=llm_provider
|
||||||
)
|
)
|
||||||
|
if supported_openai_params is None:
|
||||||
|
supported_openai_params = []
|
||||||
model_info = ModelMapInfo(
|
model_info = ModelMapInfo(
|
||||||
max_tokens=None,
|
max_tokens=None,
|
||||||
max_input_tokens=None,
|
max_input_tokens=None,
|
||||||
|
@ -3546,7 +3798,6 @@ class Router:
|
||||||
## get healthy deployments
|
## get healthy deployments
|
||||||
### get all deployments
|
### get all deployments
|
||||||
healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
|
healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
|
||||||
|
|
||||||
if len(healthy_deployments) == 0:
|
if len(healthy_deployments) == 0:
|
||||||
# check if the user sent in a deployment name instead
|
# check if the user sent in a deployment name instead
|
||||||
healthy_deployments = [
|
healthy_deployments = [
|
||||||
|
|
2
litellm/tests/openai_batch_completions.jsonl
Normal file
2
litellm/tests/openai_batch_completions.jsonl
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
|
||||||
|
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 10}}
|
|
@ -14,7 +14,7 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import pytest, logging, asyncio
|
import pytest, logging, asyncio
|
||||||
import litellm, asyncio
|
import litellm, asyncio
|
||||||
from litellm.proxy.proxy_server import add_new_model, update_model
|
from litellm.proxy.proxy_server import add_new_model, update_model, LitellmUserRoles
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||||
|
|
||||||
|
@ -90,7 +90,9 @@ async def test_add_new_model(prisma_client):
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN.value,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -137,7 +139,9 @@ async def test_add_update_model(prisma_client):
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN.value,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -166,7 +170,9 @@ async def test_add_update_model(prisma_client):
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN.value,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -499,6 +499,36 @@ async def test_webhook_alerting(alerting_type):
|
||||||
mock_send_alert.assert_awaited_once()
|
mock_send_alert.assert_awaited_once()
|
||||||
|
|
||||||
|
|
||||||
|
# @pytest.mark.asyncio
|
||||||
|
# async def test_webhook_customer_spend_event():
|
||||||
|
# """
|
||||||
|
# Test if customer spend is working as expected
|
||||||
|
# """
|
||||||
|
# slack_alerting = SlackAlerting(alerting=["webhook"])
|
||||||
|
|
||||||
|
# with patch.object(
|
||||||
|
# slack_alerting, "send_webhook_alert", new=AsyncMock()
|
||||||
|
# ) as mock_send_alert:
|
||||||
|
# user_info = {
|
||||||
|
# "token": "50e55ca5bfbd0759697538e8d23c0cd5031f52d9e19e176d7233b20c7c4d3403",
|
||||||
|
# "spend": 1,
|
||||||
|
# "max_budget": 0,
|
||||||
|
# "user_id": "ishaan@berri.ai",
|
||||||
|
# "user_email": "ishaan@berri.ai",
|
||||||
|
# "key_alias": "my-test-key",
|
||||||
|
# "projected_exceeded_date": "10/20/2024",
|
||||||
|
# "projected_spend": 200,
|
||||||
|
# }
|
||||||
|
|
||||||
|
# user_info = CallInfo(**user_info)
|
||||||
|
# for _ in range(50):
|
||||||
|
# await slack_alerting.budget_alerts(
|
||||||
|
# type=alerting_type,
|
||||||
|
# user_info=user_info,
|
||||||
|
# )
|
||||||
|
# mock_send_alert.assert_awaited_once()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model, api_base, llm_provider, vertex_project, vertex_location",
|
"model, api_base, llm_provider, vertex_project, vertex_location",
|
||||||
[
|
[
|
||||||
|
|
96
litellm/tests/test_audio_speech.py
Normal file
96
litellm/tests/test_audio_speech.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
# What is this?
|
||||||
|
## unit tests for openai tts endpoint
|
||||||
|
|
||||||
|
import sys, os, asyncio, time, random, uuid
|
||||||
|
import traceback
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import pytest
|
||||||
|
import litellm, openai
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_speech_litellm(sync_mode):
|
||||||
|
speech_file_path = Path(__file__).parent / "speech.mp3"
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
|
response = litellm.speech(
|
||||||
|
model="openai/tts-1",
|
||||||
|
voice="alloy",
|
||||||
|
input="the quick brown fox jumped over the lazy dogs",
|
||||||
|
api_base=None,
|
||||||
|
api_key=None,
|
||||||
|
organization=None,
|
||||||
|
project=None,
|
||||||
|
max_retries=1,
|
||||||
|
timeout=600,
|
||||||
|
client=None,
|
||||||
|
optional_params={},
|
||||||
|
)
|
||||||
|
|
||||||
|
from litellm.llms.openai import HttpxBinaryResponseContent
|
||||||
|
|
||||||
|
assert isinstance(response, HttpxBinaryResponseContent)
|
||||||
|
else:
|
||||||
|
response = await litellm.aspeech(
|
||||||
|
model="openai/tts-1",
|
||||||
|
voice="alloy",
|
||||||
|
input="the quick brown fox jumped over the lazy dogs",
|
||||||
|
api_base=None,
|
||||||
|
api_key=None,
|
||||||
|
organization=None,
|
||||||
|
project=None,
|
||||||
|
max_retries=1,
|
||||||
|
timeout=600,
|
||||||
|
client=None,
|
||||||
|
optional_params={},
|
||||||
|
)
|
||||||
|
|
||||||
|
from litellm.llms.openai import HttpxBinaryResponseContent
|
||||||
|
|
||||||
|
assert isinstance(response, HttpxBinaryResponseContent)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("mode", ["iterator"]) # "file",
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_audio_speech_router(mode):
|
||||||
|
speech_file_path = Path(__file__).parent / "speech.mp3"
|
||||||
|
|
||||||
|
from litellm import Router
|
||||||
|
|
||||||
|
client = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "tts",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "openai/tts-1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.aspeech(
|
||||||
|
model="tts",
|
||||||
|
voice="alloy",
|
||||||
|
input="the quick brown fox jumped over the lazy dogs",
|
||||||
|
api_base=None,
|
||||||
|
api_key=None,
|
||||||
|
organization=None,
|
||||||
|
project=None,
|
||||||
|
max_retries=1,
|
||||||
|
timeout=600,
|
||||||
|
client=None,
|
||||||
|
optional_params={},
|
||||||
|
)
|
||||||
|
|
||||||
|
from litellm.llms.openai import HttpxBinaryResponseContent
|
||||||
|
|
||||||
|
assert isinstance(response, HttpxBinaryResponseContent)
|
62
litellm/tests/test_auth_checks.py
Normal file
62
litellm/tests/test_auth_checks.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
# What is this?
|
||||||
|
## Tests if 'get_end_user_object' works as expected
|
||||||
|
|
||||||
|
import sys, os, asyncio, time, random, uuid
|
||||||
|
import traceback
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import pytest, litellm
|
||||||
|
from litellm.proxy.auth.auth_checks import get_end_user_object
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
|
||||||
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("customer_spend, customer_budget", [(0, 10), (10, 0)])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_end_user_object(customer_spend, customer_budget):
|
||||||
|
"""
|
||||||
|
Scenario 1: normal
|
||||||
|
Scenario 2: user over budget
|
||||||
|
"""
|
||||||
|
end_user_id = "my-test-customer"
|
||||||
|
_budget = LiteLLM_BudgetTable(max_budget=customer_budget)
|
||||||
|
end_user_obj = LiteLLM_EndUserTable(
|
||||||
|
user_id=end_user_id,
|
||||||
|
spend=customer_spend,
|
||||||
|
litellm_budget_table=_budget,
|
||||||
|
blocked=False,
|
||||||
|
)
|
||||||
|
_cache = DualCache()
|
||||||
|
_key = "end_user_id:{}".format(end_user_id)
|
||||||
|
_cache.set_cache(key=_key, value=end_user_obj)
|
||||||
|
try:
|
||||||
|
await get_end_user_object(
|
||||||
|
end_user_id=end_user_id,
|
||||||
|
prisma_client="RANDOM VALUE", # type: ignore
|
||||||
|
user_api_key_cache=_cache,
|
||||||
|
)
|
||||||
|
if customer_spend > customer_budget:
|
||||||
|
pytest.fail(
|
||||||
|
"Expected call to fail. Customer Spend={}, Customer Budget={}".format(
|
||||||
|
customer_spend, customer_budget
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if (
|
||||||
|
isinstance(e, litellm.BudgetExceededError)
|
||||||
|
and customer_spend > customer_budget
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pytest.fail(
|
||||||
|
"Expected call to work. Customer Spend={}, Customer Budget={}, Error={}".format(
|
||||||
|
customer_spend, customer_budget, str(e)
|
||||||
|
)
|
||||||
|
)
|
|
@ -7,7 +7,7 @@ import os, io
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent-directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion, completion_cost, Timeout
|
from litellm import embedding, completion, completion_cost, Timeout
|
||||||
|
@ -38,7 +38,7 @@ def reset_callbacks():
|
||||||
@pytest.mark.skip(reason="Local test")
|
@pytest.mark.skip(reason="Local test")
|
||||||
def test_response_model_none():
|
def test_response_model_none():
|
||||||
"""
|
"""
|
||||||
Addresses: https://github.com/BerriAI/litellm/issues/2972
|
Addresses:https://github.com/BerriAI/litellm/issues/2972
|
||||||
"""
|
"""
|
||||||
x = completion(
|
x = completion(
|
||||||
model="mymodel",
|
model="mymodel",
|
||||||
|
@ -1397,6 +1397,81 @@ def test_hf_classifier_task():
|
||||||
pytest.fail(f"Error occurred: {str(e)}")
|
pytest.fail(f"Error occurred: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_ollama_image():
|
||||||
|
"""
|
||||||
|
Test that datauri prefixes are removed, JPEG/PNG images are passed
|
||||||
|
through, and other image formats are converted to JPEG. Non-image
|
||||||
|
data is untouched.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io, base64
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
def mock_post(url, **kwargs):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.headers = {"Content-Type": "application/json"}
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
# return the image in the response so that it can be tested
|
||||||
|
# against the original
|
||||||
|
"response": kwargs["json"]["images"]
|
||||||
|
}
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
def make_b64image(format):
|
||||||
|
image = Image.new(mode="RGB", size=(1, 1))
|
||||||
|
image_buffer = io.BytesIO()
|
||||||
|
image.save(image_buffer, format)
|
||||||
|
return base64.b64encode(image_buffer.getvalue()).decode("utf-8")
|
||||||
|
|
||||||
|
jpeg_image = make_b64image("JPEG")
|
||||||
|
webp_image = make_b64image("WEBP")
|
||||||
|
png_image = make_b64image("PNG")
|
||||||
|
|
||||||
|
base64_data = base64.b64encode(b"some random data")
|
||||||
|
datauri_base64_data = f"data:text/plain;base64,{base64_data}"
|
||||||
|
|
||||||
|
tests = [
|
||||||
|
# input expected
|
||||||
|
[jpeg_image, jpeg_image],
|
||||||
|
[webp_image, None],
|
||||||
|
[png_image, png_image],
|
||||||
|
[f"data:image/jpeg;base64,{jpeg_image}", jpeg_image],
|
||||||
|
[f"data:image/webp;base64,{webp_image}", None],
|
||||||
|
[f"data:image/png;base64,{png_image}", png_image],
|
||||||
|
[datauri_base64_data, datauri_base64_data],
|
||||||
|
]
|
||||||
|
|
||||||
|
for test in tests:
|
||||||
|
try:
|
||||||
|
with patch("requests.post", side_effect=mock_post):
|
||||||
|
response = completion(
|
||||||
|
model="ollama/llava",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "Whats in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": test[0]},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
if not test[1]:
|
||||||
|
# the conversion process may not always generate the same image,
|
||||||
|
# so just check for a JPEG image when a conversion was done.
|
||||||
|
image_data = response["choices"][0]["message"]["content"][0]
|
||||||
|
image = Image.open(io.BytesIO(base64.b64decode(image_data)))
|
||||||
|
assert image.format == "JPEG"
|
||||||
|
else:
|
||||||
|
assert response["choices"][0]["message"]["content"][0] == test[1]
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
########################### End of Hugging Face Tests ##############################################
|
########################### End of Hugging Face Tests ##############################################
|
||||||
# def test_completion_hf_api():
|
# def test_completion_hf_api():
|
||||||
# # failing on circle-ci commenting out
|
# # failing on circle-ci commenting out
|
||||||
|
|
|
@ -13,7 +13,7 @@ sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the, system path
|
) # Adds the parent directory to the, system path
|
||||||
import pytest, litellm
|
import pytest, litellm
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, ConfigDict
|
||||||
from litellm.proxy.proxy_server import ProxyConfig
|
from litellm.proxy.proxy_server import ProxyConfig
|
||||||
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
|
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
|
||||||
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
|
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
|
||||||
|
@ -26,8 +26,7 @@ class DBModel(BaseModel):
|
||||||
model_info: dict
|
model_info: dict
|
||||||
litellm_params: dict
|
litellm_params: dict
|
||||||
|
|
||||||
class Config:
|
config_dict: ConfigDict = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -61,6 +61,7 @@ from litellm.proxy.proxy_server import (
|
||||||
audio_transcriptions,
|
audio_transcriptions,
|
||||||
moderations,
|
moderations,
|
||||||
model_list,
|
model_list,
|
||||||
|
LitellmUserRoles,
|
||||||
)
|
)
|
||||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
|
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
@ -137,7 +138,9 @@ async def test_new_user_response(prisma_client):
|
||||||
team_id=_team_id,
|
team_id=_team_id,
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -206,7 +209,7 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
from litellm.proxy.proxy_server import user_api_key_cache
|
from litellm.proxy.proxy_server import user_api_key_cache
|
||||||
|
|
||||||
request = NewUserRequest(user_role="app_owner")
|
request = NewUserRequest(user_role=LitellmUserRoles.INTERNAL_USER)
|
||||||
key = await new_user(request)
|
key = await new_user(request)
|
||||||
print(key)
|
print(key)
|
||||||
user_id = key.user_id
|
user_id = key.user_id
|
||||||
|
@ -215,7 +218,7 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
|
||||||
new_user_info = await user_info(user_id=user_id)
|
new_user_info = await user_info(user_id=user_id)
|
||||||
new_user_info = new_user_info["user_info"]
|
new_user_info = new_user_info["user_info"]
|
||||||
print("new_user_info=", new_user_info)
|
print("new_user_info=", new_user_info)
|
||||||
assert new_user_info.user_role == "app_owner"
|
assert new_user_info.user_role == LitellmUserRoles.INTERNAL_USER
|
||||||
assert new_user_info.user_id == user_id
|
assert new_user_info.user_id == user_id
|
||||||
|
|
||||||
generated_key = key.key
|
generated_key = key.key
|
||||||
|
@ -363,7 +366,8 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
|
||||||
)
|
)
|
||||||
|
|
||||||
new_team_response = await new_team(
|
new_team_response = await new_team(
|
||||||
data=team_request, user_api_key_dict=UserAPIKeyAuth(user_role="proxy_admin")
|
data=team_request,
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
||||||
)
|
)
|
||||||
print("new_team_response", new_team_response)
|
print("new_team_response", new_team_response)
|
||||||
created_team_id = new_team_response["team_id"]
|
created_team_id = new_team_response["team_id"]
|
||||||
|
@ -559,7 +563,7 @@ def test_call_with_end_user_over_budget(prisma_client):
|
||||||
asyncio.run(test())
|
asyncio.run(test())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
assert "Budget has been exceeded! Current" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -922,7 +926,7 @@ def test_delete_key(prisma_client):
|
||||||
# use generated key to auth in
|
# use generated key to auth in
|
||||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
print(f"result: {result}")
|
print(f"result: {result}")
|
||||||
result.user_role = "proxy_admin"
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
||||||
# delete the key
|
# delete the key
|
||||||
result_delete_key = await delete_key_fn(
|
result_delete_key = await delete_key_fn(
|
||||||
data=delete_key_request, user_api_key_dict=result
|
data=delete_key_request, user_api_key_dict=result
|
||||||
|
@ -972,7 +976,7 @@ def test_delete_key_auth(prisma_client):
|
||||||
# use generated key to auth in
|
# use generated key to auth in
|
||||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
print(f"result: {result}")
|
print(f"result: {result}")
|
||||||
result.user_role = "proxy_admin"
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
||||||
|
|
||||||
result_delete_key = await delete_key_fn(
|
result_delete_key = await delete_key_fn(
|
||||||
data=delete_key_request, user_api_key_dict=result
|
data=delete_key_request, user_api_key_dict=result
|
||||||
|
@ -1044,7 +1048,7 @@ def test_generate_and_call_key_info(prisma_client):
|
||||||
# use generated key to auth in
|
# use generated key to auth in
|
||||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
print(f"result: {result}")
|
print(f"result: {result}")
|
||||||
result.user_role = "proxy_admin"
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
||||||
|
|
||||||
result_delete_key = await delete_key_fn(
|
result_delete_key = await delete_key_fn(
|
||||||
data=delete_key_request, user_api_key_dict=result
|
data=delete_key_request, user_api_key_dict=result
|
||||||
|
@ -1078,7 +1082,9 @@ def test_generate_and_update_key(prisma_client):
|
||||||
team_id=_team_1,
|
team_id=_team_1,
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1088,7 +1094,9 @@ def test_generate_and_update_key(prisma_client):
|
||||||
team_id=_team_2,
|
team_id=_team_2,
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1158,7 +1166,7 @@ def test_generate_and_update_key(prisma_client):
|
||||||
# use generated key to auth in
|
# use generated key to auth in
|
||||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
print(f"result: {result}")
|
print(f"result: {result}")
|
||||||
result.user_role = "proxy_admin"
|
result.user_role = LitellmUserRoles.PROXY_ADMIN
|
||||||
|
|
||||||
result_delete_key = await delete_key_fn(
|
result_delete_key = await delete_key_fn(
|
||||||
data=delete_key_request, user_api_key_dict=result
|
data=delete_key_request, user_api_key_dict=result
|
||||||
|
@ -2038,7 +2046,9 @@ async def test_master_key_hashing(prisma_client):
|
||||||
await new_team(
|
await new_team(
|
||||||
NewTeamRequest(team_id=_team_id),
|
NewTeamRequest(team_id=_team_id),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -2076,7 +2086,7 @@ async def test_reset_spend_authentication(prisma_client):
|
||||||
"""
|
"""
|
||||||
1. Test master key can access this route -> ONLY MASTER KEY SHOULD BE ABLE TO RESET SPEND
|
1. Test master key can access this route -> ONLY MASTER KEY SHOULD BE ABLE TO RESET SPEND
|
||||||
2. Test that non-master key gets rejected
|
2. Test that non-master key gets rejected
|
||||||
3. Test that non-master key with role == "proxy_admin" or admin gets rejected
|
3. Test that non-master key with role == LitellmUserRoles.PROXY_ADMIN or admin gets rejected
|
||||||
"""
|
"""
|
||||||
|
|
||||||
print("prisma client=", prisma_client)
|
print("prisma client=", prisma_client)
|
||||||
|
@ -2121,10 +2131,10 @@ async def test_reset_spend_authentication(prisma_client):
|
||||||
in e.message
|
in e.message
|
||||||
)
|
)
|
||||||
|
|
||||||
# Test 3 - Non-Master Key with role == "proxy_admin" or admin
|
# Test 3 - Non-Master Key with role == LitellmUserRoles.PROXY_ADMIN or admin
|
||||||
_response = await new_user(
|
_response = await new_user(
|
||||||
data=NewUserRequest(
|
data=NewUserRequest(
|
||||||
user_role="proxy_admin",
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
tpm_limit=20,
|
tpm_limit=20,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -2174,7 +2184,9 @@ async def test_create_update_team(prisma_client):
|
||||||
rpm_limit=20,
|
rpm_limit=20,
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -2200,7 +2212,9 @@ async def test_create_update_team(prisma_client):
|
||||||
rpm_limit=30,
|
rpm_limit=30,
|
||||||
),
|
),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role="proxy_admin", api_key="sk-1234", user_id="1234"
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
api_key="sk-1234",
|
||||||
|
user_id="1234",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
161
litellm/tests/test_openai_batches.py
Normal file
161
litellm/tests/test_openai_batches.py
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
# What is this?
|
||||||
|
## Unit Tests for OpenAI Batches API
|
||||||
|
import sys, os, json
|
||||||
|
import traceback
|
||||||
|
import asyncio
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import pytest, logging, asyncio
|
||||||
|
import litellm
|
||||||
|
from litellm import (
|
||||||
|
create_batch,
|
||||||
|
create_file,
|
||||||
|
)
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_batch():
|
||||||
|
"""
|
||||||
|
1. Create File for Batch completion
|
||||||
|
2. Create Batch Request
|
||||||
|
3. Retrieve the specific batch
|
||||||
|
"""
|
||||||
|
file_name = "openai_batch_completions.jsonl"
|
||||||
|
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
file_path = os.path.join(_current_dir, file_name)
|
||||||
|
|
||||||
|
file_obj = litellm.create_file(
|
||||||
|
file=open(file_path, "rb"),
|
||||||
|
purpose="batch",
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
)
|
||||||
|
print("Response from creating file=", file_obj)
|
||||||
|
|
||||||
|
batch_input_file_id = file_obj.id
|
||||||
|
assert (
|
||||||
|
batch_input_file_id is not None
|
||||||
|
), "Failed to create file, expected a non null file_id but got {batch_input_file_id}"
|
||||||
|
|
||||||
|
create_batch_response = litellm.create_batch(
|
||||||
|
completion_window="24h",
|
||||||
|
endpoint="/v1/chat/completions",
|
||||||
|
input_file_id=batch_input_file_id,
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
metadata={"key1": "value1", "key2": "value2"},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("response from litellm.create_batch=", create_batch_response)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
create_batch_response.id is not None
|
||||||
|
), f"Failed to create batch, expected a non null batch_id but got {create_batch_response.id}"
|
||||||
|
assert (
|
||||||
|
create_batch_response.endpoint == "/v1/chat/completions"
|
||||||
|
), f"Failed to create batch, expected endpoint to be /v1/chat/completions but got {create_batch_response.endpoint}"
|
||||||
|
assert (
|
||||||
|
create_batch_response.input_file_id == batch_input_file_id
|
||||||
|
), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
|
||||||
|
|
||||||
|
retrieved_batch = litellm.retrieve_batch(
|
||||||
|
batch_id=create_batch_response.id, custom_llm_provider="openai"
|
||||||
|
)
|
||||||
|
print("retrieved batch=", retrieved_batch)
|
||||||
|
# just assert that we retrieved a non None batch
|
||||||
|
|
||||||
|
assert retrieved_batch.id == create_batch_response.id
|
||||||
|
|
||||||
|
file_content = litellm.file_content(
|
||||||
|
file_id=batch_input_file_id, custom_llm_provider="openai"
|
||||||
|
)
|
||||||
|
|
||||||
|
result = file_content.content
|
||||||
|
|
||||||
|
result_file_name = "batch_job_results_furniture.jsonl"
|
||||||
|
|
||||||
|
with open(result_file_name, "wb") as file:
|
||||||
|
file.write(result)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_async_create_batch():
|
||||||
|
"""
|
||||||
|
1. Create File for Batch completion
|
||||||
|
2. Create Batch Request
|
||||||
|
3. Retrieve the specific batch
|
||||||
|
"""
|
||||||
|
print("Testing async create batch")
|
||||||
|
|
||||||
|
file_name = "openai_batch_completions.jsonl"
|
||||||
|
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
file_path = os.path.join(_current_dir, file_name)
|
||||||
|
file_obj = await litellm.acreate_file(
|
||||||
|
file=open(file_path, "rb"),
|
||||||
|
purpose="batch",
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
)
|
||||||
|
print("Response from creating file=", file_obj)
|
||||||
|
|
||||||
|
batch_input_file_id = file_obj.id
|
||||||
|
assert (
|
||||||
|
batch_input_file_id is not None
|
||||||
|
), "Failed to create file, expected a non null file_id but got {batch_input_file_id}"
|
||||||
|
|
||||||
|
create_batch_response = await litellm.acreate_batch(
|
||||||
|
completion_window="24h",
|
||||||
|
endpoint="/v1/chat/completions",
|
||||||
|
input_file_id=batch_input_file_id,
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
metadata={"key1": "value1", "key2": "value2"},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("response from litellm.create_batch=", create_batch_response)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
create_batch_response.id is not None
|
||||||
|
), f"Failed to create batch, expected a non null batch_id but got {create_batch_response.id}"
|
||||||
|
assert (
|
||||||
|
create_batch_response.endpoint == "/v1/chat/completions"
|
||||||
|
), f"Failed to create batch, expected endpoint to be /v1/chat/completions but got {create_batch_response.endpoint}"
|
||||||
|
assert (
|
||||||
|
create_batch_response.input_file_id == batch_input_file_id
|
||||||
|
), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
|
||||||
|
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
retrieved_batch = await litellm.aretrieve_batch(
|
||||||
|
batch_id=create_batch_response.id, custom_llm_provider="openai"
|
||||||
|
)
|
||||||
|
print("retrieved batch=", retrieved_batch)
|
||||||
|
# just assert that we retrieved a non None batch
|
||||||
|
|
||||||
|
assert retrieved_batch.id == create_batch_response.id
|
||||||
|
|
||||||
|
# try to get file content for our original file
|
||||||
|
|
||||||
|
file_content = await litellm.afile_content(
|
||||||
|
file_id=batch_input_file_id, custom_llm_provider="openai"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("file content = ", file_content)
|
||||||
|
|
||||||
|
# # write this file content to a file
|
||||||
|
# with open("file_content.json", "w") as f:
|
||||||
|
# json.dump(file_content, f)
|
||||||
|
|
||||||
|
|
||||||
|
def test_retrieve_batch():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_cancel_batch():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_batch():
|
||||||
|
pass
|
|
@ -97,6 +97,18 @@ def test_databricks_optional_params():
|
||||||
assert "user" not in optional_params
|
assert "user" not in optional_params
|
||||||
|
|
||||||
|
|
||||||
|
def test_azure_ai_mistral_optional_params():
|
||||||
|
litellm.drop_params = True
|
||||||
|
optional_params = get_optional_params(
|
||||||
|
model="mistral-large-latest",
|
||||||
|
user="John",
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
)
|
||||||
|
assert "user" not in optional_params
|
||||||
|
|
||||||
|
|
||||||
def test_azure_gpt_optional_params_gpt_vision():
|
def test_azure_gpt_optional_params_gpt_vision():
|
||||||
# for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
|
# for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
|
||||||
optional_params = litellm.utils.get_optional_params(
|
optional_params = litellm.utils.get_optional_params(
|
||||||
|
|
|
@ -19,6 +19,25 @@ import os, httpx
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_sensitive_keys():
|
||||||
|
try:
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # openai model name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": "special-key",
|
||||||
|
},
|
||||||
|
"model_info": {"id": 12345},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"error msg - {str(e)}")
|
||||||
|
assert "special-key" not in str(e)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("num_retries", [None, 2])
|
@pytest.mark.parametrize("num_retries", [None, 2])
|
||||||
@pytest.mark.parametrize("max_retries", [None, 4])
|
@pytest.mark.parametrize("max_retries", [None, 4])
|
||||||
def test_router_num_retries_init(num_retries, max_retries):
|
def test_router_num_retries_init(num_retries, max_retries):
|
||||||
|
|
|
@ -19,8 +19,141 @@ import os, httpx
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("mode", ["all_responses", "fastest_response"])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_batch_completion_multiple_models():
|
async def test_batch_completion_multiple_models(mode):
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "groq-llama",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "groq/llama3-8b-8192",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if mode == "all_responses":
|
||||||
|
response = await router.abatch_completion(
|
||||||
|
models=["gpt-3.5-turbo", "groq-llama"],
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "is litellm becoming a better product ?"}
|
||||||
|
],
|
||||||
|
max_tokens=15,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
assert len(response) == 2
|
||||||
|
|
||||||
|
models_in_responses = []
|
||||||
|
for individual_response in response:
|
||||||
|
_model = individual_response["model"]
|
||||||
|
models_in_responses.append(_model)
|
||||||
|
|
||||||
|
# assert both models are different
|
||||||
|
assert models_in_responses[0] != models_in_responses[1]
|
||||||
|
elif mode == "fastest_response":
|
||||||
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
|
response = await router.abatch_completion_fastest_response(
|
||||||
|
model="gpt-3.5-turbo, groq-llama",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "is litellm becoming a better product ?"}
|
||||||
|
],
|
||||||
|
max_tokens=15,
|
||||||
|
)
|
||||||
|
|
||||||
|
ChatCompletion.model_validate(response.model_dump(), strict=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_batch_completion_fastest_response_unit_test():
|
||||||
|
"""
|
||||||
|
Unit test to confirm fastest response will always return the response which arrives earliest.
|
||||||
|
|
||||||
|
2 models -> 1 is cached, the other is a real llm api call => assert cached response always returned
|
||||||
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4",
|
||||||
|
},
|
||||||
|
"model_info": {"id": "1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"mock_response": "This is a fake response",
|
||||||
|
},
|
||||||
|
"model_info": {"id": "2"},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await router.abatch_completion_fastest_response(
|
||||||
|
model="gpt-4, gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "is litellm becoming a better product ?"}
|
||||||
|
],
|
||||||
|
max_tokens=500,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response._hidden_params["model_id"] == "2"
|
||||||
|
assert response.choices[0].message.content == "This is a fake response"
|
||||||
|
print(f"response: {response}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_batch_completion_fastest_response_streaming():
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "groq-llama",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "groq/llama3-8b-8192",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
||||||
|
|
||||||
|
response = await router.abatch_completion_fastest_response(
|
||||||
|
model="gpt-3.5-turbo, groq-llama",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "is litellm becoming a better product ?"}
|
||||||
|
],
|
||||||
|
max_tokens=15,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
async for chunk in response:
|
||||||
|
ChatCompletionChunk.model_validate(chunk.model_dump(), strict=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_batch_completion_multiple_models_multiple_messages():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
router = litellm.Router(
|
router = litellm.Router(
|
||||||
|
@ -43,18 +176,21 @@ async def test_batch_completion_multiple_models():
|
||||||
response = await router.abatch_completion(
|
response = await router.abatch_completion(
|
||||||
models=["gpt-3.5-turbo", "groq-llama"],
|
models=["gpt-3.5-turbo", "groq-llama"],
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "user", "content": "is litellm becoming a better product ?"}
|
[{"role": "user", "content": "is litellm becoming a better product ?"}],
|
||||||
|
[{"role": "user", "content": "who is this"}],
|
||||||
],
|
],
|
||||||
max_tokens=15,
|
max_tokens=15,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(response)
|
print("response from batches =", response)
|
||||||
assert len(response) == 2
|
assert len(response) == 2
|
||||||
|
assert len(response[0]) == 2
|
||||||
|
assert isinstance(response[0][0], litellm.ModelResponse)
|
||||||
|
|
||||||
models_in_responses = []
|
# models_in_responses = []
|
||||||
for individual_response in response:
|
# for individual_response in response:
|
||||||
_model = individual_response["model"]
|
# _model = individual_response["model"]
|
||||||
models_in_responses.append(_model)
|
# models_in_responses.append(_model)
|
||||||
|
|
||||||
# assert both models are different
|
# # assert both models are different
|
||||||
assert models_in_responses[0] != models_in_responses[1]
|
# assert models_in_responses[0] != models_in_responses[1]
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import sys, os, asyncio
|
import sys, os, asyncio
|
||||||
import traceback
|
import traceback
|
||||||
import time, pytest
|
import time, pytest, uuid
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
|
@ -241,203 +241,138 @@ def test_completion_azure_stream_content_filter_no_delta():
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
chunks = [
|
chunks = [
|
||||||
{
|
{
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {"content": "", "role": "assistant"},
|
||||||
"content": "",
|
"finish_reason": None,
|
||||||
"role": "assistant"
|
"index": 0,
|
||||||
},
|
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1716563849,
|
"created": 1716563849,
|
||||||
"model": "gpt-4o-2024-05-13",
|
"model": "gpt-4o-2024-05-13",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
|
"choices": [
|
||||||
|
{"delta": {"content": "This"}, "finish_reason": None, "index": 0}
|
||||||
|
],
|
||||||
|
"created": 1716563849,
|
||||||
|
"model": "gpt-4o-2024-05-13",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
|
"choices": [
|
||||||
|
{"delta": {"content": " is"}, "finish_reason": None, "index": 0}
|
||||||
|
],
|
||||||
|
"created": 1716563849,
|
||||||
|
"model": "gpt-4o-2024-05-13",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
|
"choices": [
|
||||||
|
{"delta": {"content": " a"}, "finish_reason": None, "index": 0}
|
||||||
|
],
|
||||||
|
"created": 1716563849,
|
||||||
|
"model": "gpt-4o-2024-05-13",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
|
"choices": [
|
||||||
|
{"delta": {"content": " dummy"}, "finish_reason": None, "index": 0}
|
||||||
|
],
|
||||||
|
"created": 1716563849,
|
||||||
|
"model": "gpt-4o-2024-05-13",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
|
},
|
||||||
|
{
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {"content": " response"},
|
||||||
"content": "This"
|
"finish_reason": None,
|
||||||
},
|
"index": 0,
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1716563849,
|
"created": 1716563849,
|
||||||
"model": "gpt-4o-2024-05-13",
|
"model": "gpt-4o-2024-05-13",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": " is"
|
|
||||||
},
|
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1716563849,
|
|
||||||
"model": "gpt-4o-2024-05-13",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": " a"
|
|
||||||
},
|
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1716563849,
|
|
||||||
"model": "gpt-4o-2024-05-13",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": " dummy"
|
|
||||||
},
|
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1716563849,
|
|
||||||
"model": "gpt-4o-2024-05-13",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"delta": {
|
|
||||||
"content": " response"
|
|
||||||
},
|
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1716563849,
|
|
||||||
"model": "gpt-4o-2024-05-13",
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "",
|
"id": "",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": None,
|
"finish_reason": None,
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"content_filter_offsets": {
|
"content_filter_offsets": {
|
||||||
"check_offset": 35159,
|
"check_offset": 35159,
|
||||||
"start_offset": 35159,
|
"start_offset": 35159,
|
||||||
"end_offset": 36150
|
"end_offset": 36150,
|
||||||
},
|
|
||||||
"content_filter_results": {
|
|
||||||
"hate": {
|
|
||||||
"filtered": False,
|
|
||||||
"severity": "safe"
|
|
||||||
},
|
},
|
||||||
"self_harm": {
|
"content_filter_results": {
|
||||||
"filtered": False,
|
"hate": {"filtered": False, "severity": "safe"},
|
||||||
"severity": "safe"
|
"self_harm": {"filtered": False, "severity": "safe"},
|
||||||
|
"sexual": {"filtered": False, "severity": "safe"},
|
||||||
|
"violence": {"filtered": False, "severity": "safe"},
|
||||||
},
|
},
|
||||||
"sexual": {
|
|
||||||
"filtered": False,
|
|
||||||
"severity": "safe"
|
|
||||||
},
|
|
||||||
"violence": {
|
|
||||||
"filtered": False,
|
|
||||||
"severity": "safe"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 0,
|
"created": 0,
|
||||||
"model": "",
|
"model": "",
|
||||||
"object": ""
|
"object": "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{"delta": {"content": "."}, "finish_reason": None, "index": 0}
|
||||||
"delta": {
|
|
||||||
"content": "."
|
|
||||||
},
|
|
||||||
"finish_reason": None,
|
|
||||||
"index": 0
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"created": 1716563849,
|
"created": 1716563849,
|
||||||
"model": "gpt-4o-2024-05-13",
|
"model": "gpt-4o-2024-05-13",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
"id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
|
||||||
"choices": [
|
"choices": [{"delta": {}, "finish_reason": "stop", "index": 0}],
|
||||||
{
|
|
||||||
"delta": {},
|
|
||||||
"finish_reason": "stop",
|
|
||||||
"index": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1716563849,
|
"created": 1716563849,
|
||||||
"model": "gpt-4o-2024-05-13",
|
"model": "gpt-4o-2024-05-13",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"system_fingerprint": "fp_5f4bad809a"
|
"system_fingerprint": "fp_5f4bad809a",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "",
|
"id": "",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": None,
|
"finish_reason": None,
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"content_filter_offsets": {
|
"content_filter_offsets": {
|
||||||
"check_offset": 36150,
|
"check_offset": 36150,
|
||||||
"start_offset": 36060,
|
"start_offset": 36060,
|
||||||
"end_offset": 37029
|
"end_offset": 37029,
|
||||||
},
|
|
||||||
"content_filter_results": {
|
|
||||||
"hate": {
|
|
||||||
"filtered": False,
|
|
||||||
"severity": "safe"
|
|
||||||
},
|
},
|
||||||
"self_harm": {
|
"content_filter_results": {
|
||||||
"filtered": False,
|
"hate": {"filtered": False, "severity": "safe"},
|
||||||
"severity": "safe"
|
"self_harm": {"filtered": False, "severity": "safe"},
|
||||||
|
"sexual": {"filtered": False, "severity": "safe"},
|
||||||
|
"violence": {"filtered": False, "severity": "safe"},
|
||||||
},
|
},
|
||||||
"sexual": {
|
|
||||||
"filtered": False,
|
|
||||||
"severity": "safe"
|
|
||||||
},
|
|
||||||
"violence": {
|
|
||||||
"filtered": False,
|
|
||||||
"severity": "safe"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 0,
|
"created": 0,
|
||||||
"model": "",
|
"model": "",
|
||||||
"object": ""
|
"object": "",
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
chunk_list = []
|
chunk_list = []
|
||||||
|
@ -1449,29 +1384,68 @@ def test_bedrock_claude_3_streaming():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_claude_3_streaming_finish_reason():
|
async def test_claude_3_streaming_finish_reason(sync_mode):
|
||||||
try:
|
try:
|
||||||
|
import threading
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": "Be helpful"},
|
{"role": "system", "content": "Be helpful"},
|
||||||
{"role": "user", "content": "What do you know?"},
|
{"role": "user", "content": "What do you know?"},
|
||||||
]
|
]
|
||||||
response: ModelResponse = await litellm.acompletion( # type: ignore
|
|
||||||
model="claude-3-opus-20240229",
|
def sync_test_streaming():
|
||||||
messages=messages,
|
response: litellm.CustomStreamWrapper = litellm.acompletion( # type: ignore
|
||||||
stream=True,
|
model="claude-3-opus-20240229",
|
||||||
max_tokens=10,
|
messages=messages,
|
||||||
)
|
stream=True,
|
||||||
complete_response = ""
|
max_tokens=10,
|
||||||
# Add any assertions here to-check the response
|
)
|
||||||
num_finish_reason = 0
|
complete_response = ""
|
||||||
async for chunk in response:
|
# Add any assertions here to-check the response
|
||||||
print(f"chunk: {chunk}")
|
num_finish_reason = 0
|
||||||
if isinstance(chunk, ModelResponse):
|
for chunk in response:
|
||||||
if chunk.choices[0].finish_reason is not None:
|
print(f"chunk: {chunk}")
|
||||||
num_finish_reason += 1
|
if isinstance(chunk, ModelResponse):
|
||||||
assert num_finish_reason == 1
|
if chunk.choices[0].finish_reason is not None:
|
||||||
|
num_finish_reason += 1
|
||||||
|
assert num_finish_reason == 1
|
||||||
|
|
||||||
|
async def test_streaming():
|
||||||
|
response: litellm.CustomStreamWrapper = await litellm.acompletion( # type: ignore
|
||||||
|
model="claude-3-opus-20240229",
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
max_tokens=10,
|
||||||
|
)
|
||||||
|
complete_response = ""
|
||||||
|
# Add any assertions here to-check the response
|
||||||
|
num_finish_reason = 0
|
||||||
|
async for chunk in response:
|
||||||
|
print(f"chunk: {chunk}")
|
||||||
|
if isinstance(chunk, ModelResponse):
|
||||||
|
if chunk.choices[0].finish_reason is not None:
|
||||||
|
num_finish_reason += 1
|
||||||
|
assert num_finish_reason == 1
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
for _ in range(2):
|
||||||
|
if sync_mode == False:
|
||||||
|
tasks.append(test_streaming())
|
||||||
|
else:
|
||||||
|
thread = threading.Thread(target=sync_test_streaming)
|
||||||
|
thread.start()
|
||||||
|
tasks.append(thread)
|
||||||
|
|
||||||
|
if sync_mode == False:
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
else:
|
||||||
|
# Wait for all threads to complete
|
||||||
|
for thread in tasks:
|
||||||
|
thread.join()
|
||||||
|
|
||||||
except RateLimitError:
|
except RateLimitError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -1,49 +1,35 @@
|
||||||
# Commented out for now - since traceloop break ci/cd
|
import sys
|
||||||
# import sys
|
import os
|
||||||
# import os
|
import time
|
||||||
# import io, asyncio
|
import pytest
|
||||||
|
import litellm
|
||||||
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
from traceloop.sdk import Traceloop
|
||||||
|
|
||||||
# sys.path.insert(0, os.path.abspath('../..'))
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
# from litellm import completion
|
|
||||||
# import litellm
|
|
||||||
# litellm.num_retries = 3
|
|
||||||
# litellm.success_callback = [""]
|
|
||||||
# import time
|
|
||||||
# import pytest
|
|
||||||
# from traceloop.sdk import Traceloop
|
|
||||||
# Traceloop.init(app_name="test-litellm", disable_batch=True)
|
|
||||||
|
|
||||||
|
|
||||||
# def test_traceloop_logging():
|
@pytest.fixture()
|
||||||
# try:
|
def exporter():
|
||||||
# litellm.set_verbose = True
|
exporter = InMemorySpanExporter()
|
||||||
# response = litellm.completion(
|
Traceloop.init(
|
||||||
# model="gpt-3.5-turbo",
|
app_name="test_litellm",
|
||||||
# messages=[{"role": "user", "content":"This is a test"}],
|
disable_batch=True,
|
||||||
# max_tokens=1000,
|
exporter=exporter,
|
||||||
# temperature=0.7,
|
)
|
||||||
# timeout=5,
|
litellm.success_callback = ["traceloop"]
|
||||||
# )
|
litellm.set_verbose = True
|
||||||
# print(f"response: {response}")
|
|
||||||
# except Exception as e:
|
return exporter
|
||||||
# pytest.fail(f"An exception occurred - {e}")
|
|
||||||
# # test_traceloop_logging()
|
|
||||||
|
|
||||||
|
|
||||||
# # def test_traceloop_logging_async():
|
@pytest.mark.parametrize("model", ["claude-instant-1.2", "gpt-3.5-turbo"])
|
||||||
# # try:
|
def test_traceloop_logging(exporter, model):
|
||||||
# # litellm.set_verbose = True
|
|
||||||
# # async def test_acompletion():
|
litellm.completion(
|
||||||
# # return await litellm.acompletion(
|
model=model,
|
||||||
# # model="gpt-3.5-turbo",
|
messages=[{"role": "user", "content": "This is a test"}],
|
||||||
# # messages=[{"role": "user", "content":"This is a test"}],
|
max_tokens=1000,
|
||||||
# # max_tokens=1000,
|
temperature=0.7,
|
||||||
# # temperature=0.7,
|
timeout=5,
|
||||||
# # timeout=5,
|
)
|
||||||
# # )
|
|
||||||
# # response = asyncio.run(test_acompletion())
|
|
||||||
# # print(f"response: {response}")
|
|
||||||
# # except Exception as e:
|
|
||||||
# # pytest.fail(f"An exception occurred - {e}")
|
|
||||||
# # test_traceloop_logging_async()
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import List, Optional, Union, Iterable
|
from typing import List, Optional, Union, Iterable
|
||||||
|
|
||||||
from pydantic import BaseModel, validator
|
from pydantic import BaseModel, ConfigDict, validator
|
||||||
|
|
||||||
from typing_extensions import Literal, Required, TypedDict
|
from typing_extensions import Literal, Required, TypedDict
|
||||||
|
|
||||||
|
@ -191,6 +191,4 @@ class CompletionRequest(BaseModel):
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
model_list: Optional[List[str]] = None
|
model_list: Optional[List[str]] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=(), extra="allow")
|
||||||
extra = "allow"
|
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, validator
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingRequest(BaseModel):
|
class EmbeddingRequest(BaseModel):
|
||||||
|
@ -18,6 +18,4 @@ class EmbeddingRequest(BaseModel):
|
||||||
litellm_logging_obj: Optional[dict] = None
|
litellm_logging_obj: Optional[dict] = None
|
||||||
logger_fn: Optional[str] = None
|
logger_fn: Optional[str] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="allow")
|
||||||
# allow kwargs
|
|
||||||
extra = "allow"
|
|
||||||
|
|
|
@ -6,9 +6,8 @@ from typing import (
|
||||||
Literal,
|
Literal,
|
||||||
Iterable,
|
Iterable,
|
||||||
)
|
)
|
||||||
from typing_extensions import override, Required
|
from typing_extensions import override, Required, Dict
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from openai.types.beta.threads.message_content import MessageContent
|
from openai.types.beta.threads.message_content import MessageContent
|
||||||
from openai.types.beta.threads.message import Message as OpenAIMessage
|
from openai.types.beta.threads.message import Message as OpenAIMessage
|
||||||
from openai.types.beta.thread_create_params import (
|
from openai.types.beta.thread_create_params import (
|
||||||
|
@ -18,8 +17,23 @@ from openai.types.beta.assistant_tool_param import AssistantToolParam
|
||||||
from openai.types.beta.threads.run import Run
|
from openai.types.beta.threads.run import Run
|
||||||
from openai.types.beta.assistant import Assistant
|
from openai.types.beta.assistant import Assistant
|
||||||
from openai.pagination import SyncCursorPage
|
from openai.pagination import SyncCursorPage
|
||||||
|
from os import PathLike
|
||||||
|
from openai.types import FileObject, Batch
|
||||||
|
from openai._legacy_response import HttpxBinaryResponseContent
|
||||||
|
from typing import TypedDict, List, Optional, Tuple, Mapping, IO
|
||||||
|
|
||||||
from typing import TypedDict, List, Optional
|
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||||
|
|
||||||
|
FileTypes = Union[
|
||||||
|
# file (or bytes)
|
||||||
|
FileContent,
|
||||||
|
# (filename, file (or bytes))
|
||||||
|
Tuple[Optional[str], FileContent],
|
||||||
|
# (filename, file (or bytes), content_type)
|
||||||
|
Tuple[Optional[str], FileContent, Optional[str]],
|
||||||
|
# (filename, file (or bytes), content_type, headers)
|
||||||
|
Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class NotGiven:
|
class NotGiven:
|
||||||
|
@ -146,3 +160,96 @@ class Thread(BaseModel):
|
||||||
|
|
||||||
object: Literal["thread"]
|
object: Literal["thread"]
|
||||||
"""The object type, which is always `thread`."""
|
"""The object type, which is always `thread`."""
|
||||||
|
|
||||||
|
|
||||||
|
# OpenAI Files Types
|
||||||
|
class CreateFileRequest(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
CreateFileRequest
|
||||||
|
Used by Assistants API, Batches API, and Fine-Tunes API
|
||||||
|
|
||||||
|
Required Params:
|
||||||
|
file: FileTypes
|
||||||
|
purpose: Literal['assistants', 'batch', 'fine-tune']
|
||||||
|
|
||||||
|
Optional Params:
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]] = None
|
||||||
|
timeout: Optional[float] = None
|
||||||
|
"""
|
||||||
|
|
||||||
|
file: FileTypes
|
||||||
|
purpose: Literal["assistants", "batch", "fine-tune"]
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]]
|
||||||
|
timeout: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
|
class FileContentRequest(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
FileContentRequest
|
||||||
|
Used by Assistants API, Batches API, and Fine-Tunes API
|
||||||
|
|
||||||
|
Required Params:
|
||||||
|
file_id: str
|
||||||
|
|
||||||
|
Optional Params:
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]] = None
|
||||||
|
timeout: Optional[float] = None
|
||||||
|
"""
|
||||||
|
|
||||||
|
file_id: str
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]]
|
||||||
|
timeout: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
|
# OpenAI Batches Types
|
||||||
|
class CreateBatchRequest(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
CreateBatchRequest
|
||||||
|
"""
|
||||||
|
|
||||||
|
completion_window: Literal["24h"]
|
||||||
|
endpoint: Literal["/v1/chat/completions", "/v1/embeddings"]
|
||||||
|
input_file_id: str
|
||||||
|
metadata: Optional[Dict[str, str]]
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]]
|
||||||
|
timeout: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
|
class RetrieveBatchRequest(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
RetrieveBatchRequest
|
||||||
|
"""
|
||||||
|
|
||||||
|
batch_id: str
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]]
|
||||||
|
timeout: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
|
class CancelBatchRequest(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
CancelBatchRequest
|
||||||
|
"""
|
||||||
|
|
||||||
|
batch_id: str
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]]
|
||||||
|
timeout: Optional[float]
|
||||||
|
|
||||||
|
|
||||||
|
class ListBatchRequest(TypedDict, total=False):
|
||||||
|
"""
|
||||||
|
ListBatchRequest - List your organization's batches
|
||||||
|
Calls https://api.openai.com/v1/batches
|
||||||
|
"""
|
||||||
|
|
||||||
|
after: Union[str, NotGiven]
|
||||||
|
limit: Union[int, NotGiven]
|
||||||
|
extra_headers: Optional[Dict[str, str]]
|
||||||
|
extra_body: Optional[Dict[str, str]]
|
||||||
|
timeout: Optional[float]
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
"""
|
"""
|
||||||
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
|
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
||||||
import uuid
|
import uuid
|
||||||
import enum
|
import enum
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
import datetime
|
import datetime
|
||||||
from .completion import CompletionRequest
|
from .completion import CompletionRequest
|
||||||
from .embedding import EmbeddingRequest
|
from .embedding import EmbeddingRequest
|
||||||
|
@ -18,8 +18,7 @@ class ModelConfig(BaseModel):
|
||||||
tpm: int
|
tpm: int
|
||||||
rpm: int
|
rpm: int
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class RouterConfig(BaseModel):
|
class RouterConfig(BaseModel):
|
||||||
|
@ -50,8 +49,7 @@ class RouterConfig(BaseModel):
|
||||||
"latency-based-routing",
|
"latency-based-routing",
|
||||||
] = "simple-shuffle"
|
] = "simple-shuffle"
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class UpdateRouterConfig(BaseModel):
|
class UpdateRouterConfig(BaseModel):
|
||||||
|
@ -71,17 +69,14 @@ class UpdateRouterConfig(BaseModel):
|
||||||
fallbacks: Optional[List[dict]] = None
|
fallbacks: Optional[List[dict]] = None
|
||||||
context_window_fallbacks: Optional[List[dict]] = None
|
context_window_fallbacks: Optional[List[dict]] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(BaseModel):
|
class ModelInfo(BaseModel):
|
||||||
id: Optional[
|
id: Optional[
|
||||||
str
|
str
|
||||||
] # Allow id to be optional on input, but it will always be present as a str in the model instance
|
] # Allow id to be optional on input, but it will always be present as a str in the model instance
|
||||||
db_model: bool = (
|
db_model: bool = False # used for proxy - to separate models which are stored in the db vs. config.
|
||||||
False # used for proxy - to separate models which are stored in the db vs. config.
|
|
||||||
)
|
|
||||||
updated_at: Optional[datetime.datetime] = None
|
updated_at: Optional[datetime.datetime] = None
|
||||||
updated_by: Optional[str] = None
|
updated_by: Optional[str] = None
|
||||||
|
|
||||||
|
@ -99,8 +94,7 @@ class ModelInfo(BaseModel):
|
||||||
id = str(id)
|
id = str(id)
|
||||||
super().__init__(id=id, **params)
|
super().__init__(id=id, **params)
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="allow")
|
||||||
extra = "allow"
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
|
@ -155,6 +149,8 @@ class GenericLiteLLMParams(BaseModel):
|
||||||
input_cost_per_second: Optional[float] = None
|
input_cost_per_second: Optional[float] = None
|
||||||
output_cost_per_second: Optional[float] = None
|
output_cost_per_second: Optional[float] = None
|
||||||
|
|
||||||
|
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
custom_llm_provider: Optional[str] = None,
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
@ -184,7 +180,7 @@ class GenericLiteLLMParams(BaseModel):
|
||||||
output_cost_per_token: Optional[float] = None,
|
output_cost_per_token: Optional[float] = None,
|
||||||
input_cost_per_second: Optional[float] = None,
|
input_cost_per_second: Optional[float] = None,
|
||||||
output_cost_per_second: Optional[float] = None,
|
output_cost_per_second: Optional[float] = None,
|
||||||
**params
|
**params,
|
||||||
):
|
):
|
||||||
args = locals()
|
args = locals()
|
||||||
args.pop("max_retries", None)
|
args.pop("max_retries", None)
|
||||||
|
@ -195,10 +191,6 @@ class GenericLiteLLMParams(BaseModel):
|
||||||
max_retries = int(max_retries) # cast to int
|
max_retries = int(max_retries) # cast to int
|
||||||
super().__init__(max_retries=max_retries, **args, **params)
|
super().__init__(max_retries=max_retries, **args, **params)
|
||||||
|
|
||||||
class Config:
|
|
||||||
extra = "allow"
|
|
||||||
arbitrary_types_allowed = True
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
return hasattr(self, key)
|
return hasattr(self, key)
|
||||||
|
@ -222,6 +214,7 @@ class LiteLLM_Params(GenericLiteLLMParams):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model: str
|
model: str
|
||||||
|
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -245,7 +238,7 @@ class LiteLLM_Params(GenericLiteLLMParams):
|
||||||
aws_access_key_id: Optional[str] = None,
|
aws_access_key_id: Optional[str] = None,
|
||||||
aws_secret_access_key: Optional[str] = None,
|
aws_secret_access_key: Optional[str] = None,
|
||||||
aws_region_name: Optional[str] = None,
|
aws_region_name: Optional[str] = None,
|
||||||
**params
|
**params,
|
||||||
):
|
):
|
||||||
args = locals()
|
args = locals()
|
||||||
args.pop("max_retries", None)
|
args.pop("max_retries", None)
|
||||||
|
@ -256,10 +249,6 @@ class LiteLLM_Params(GenericLiteLLMParams):
|
||||||
max_retries = int(max_retries) # cast to int
|
max_retries = int(max_retries) # cast to int
|
||||||
super().__init__(max_retries=max_retries, **args, **params)
|
super().__init__(max_retries=max_retries, **args, **params)
|
||||||
|
|
||||||
class Config:
|
|
||||||
extra = "allow"
|
|
||||||
arbitrary_types_allowed = True
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
return hasattr(self, key)
|
return hasattr(self, key)
|
||||||
|
@ -288,8 +277,7 @@ class updateDeployment(BaseModel):
|
||||||
litellm_params: Optional[updateLiteLLMParams] = None
|
litellm_params: Optional[updateLiteLLMParams] = None
|
||||||
model_info: Optional[ModelInfo] = None
|
model_info: Optional[ModelInfo] = None
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMParamsTypedDict(TypedDict, total=False):
|
class LiteLLMParamsTypedDict(TypedDict, total=False):
|
||||||
|
@ -338,12 +326,14 @@ class Deployment(BaseModel):
|
||||||
litellm_params: LiteLLM_Params
|
litellm_params: LiteLLM_Params
|
||||||
model_info: ModelInfo
|
model_info: ModelInfo
|
||||||
|
|
||||||
|
model_config = ConfigDict(extra="allow", protected_namespaces=())
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
litellm_params: LiteLLM_Params,
|
litellm_params: LiteLLM_Params,
|
||||||
model_info: Optional[Union[ModelInfo, dict]] = None,
|
model_info: Optional[Union[ModelInfo, dict]] = None,
|
||||||
**params
|
**params,
|
||||||
):
|
):
|
||||||
if model_info is None:
|
if model_info is None:
|
||||||
model_info = ModelInfo()
|
model_info = ModelInfo()
|
||||||
|
@ -353,7 +343,7 @@ class Deployment(BaseModel):
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
**params
|
**params,
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_json(self, **kwargs):
|
def to_json(self, **kwargs):
|
||||||
|
@ -363,10 +353,6 @@ class Deployment(BaseModel):
|
||||||
# if using pydantic v1
|
# if using pydantic v1
|
||||||
return self.dict(**kwargs)
|
return self.dict(**kwargs)
|
||||||
|
|
||||||
class Config:
|
|
||||||
extra = "allow"
|
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
return hasattr(self, key)
|
return hasattr(self, key)
|
||||||
|
|
|
@ -18,7 +18,7 @@ from functools import wraps, lru_cache
|
||||||
import datetime, time
|
import datetime, time
|
||||||
import tiktoken
|
import tiktoken
|
||||||
import uuid
|
import uuid
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel, ConfigDict
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import textwrap
|
import textwrap
|
||||||
import logging
|
import logging
|
||||||
|
@ -337,9 +337,7 @@ class HiddenParams(OpenAIObject):
|
||||||
model_id: Optional[str] = None # used in Router for individual deployments
|
model_id: Optional[str] = None # used in Router for individual deployments
|
||||||
api_base: Optional[str] = None # returns api base used for making completion call
|
api_base: Optional[str] = None # returns api base used for making completion call
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(extra="allow", protected_namespaces=())
|
||||||
extra = "allow"
|
|
||||||
protected_namespaces = ()
|
|
||||||
|
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
||||||
|
@ -1136,6 +1134,8 @@ class CallTypes(Enum):
|
||||||
amoderation = "amoderation"
|
amoderation = "amoderation"
|
||||||
atranscription = "atranscription"
|
atranscription = "atranscription"
|
||||||
transcription = "transcription"
|
transcription = "transcription"
|
||||||
|
aspeech = "aspeech"
|
||||||
|
speech = "speech"
|
||||||
|
|
||||||
|
|
||||||
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
|
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
|
||||||
|
@ -2027,6 +2027,7 @@ class Logging:
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
user_id=kwargs.get("user", None),
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if callback == "s3":
|
if callback == "s3":
|
||||||
|
@ -2598,6 +2599,17 @@ class Logging:
|
||||||
level="ERROR",
|
level="ERROR",
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
)
|
)
|
||||||
|
if callback == "traceloop":
|
||||||
|
traceloopLogger.log_event(
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
response_obj=None,
|
||||||
|
user_id=kwargs.get("user", None),
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
status_message=str(exception),
|
||||||
|
level="ERROR",
|
||||||
|
kwargs=self.model_call_details,
|
||||||
|
)
|
||||||
if callback == "prometheus":
|
if callback == "prometheus":
|
||||||
global prometheusLogger
|
global prometheusLogger
|
||||||
verbose_logger.debug("reaches prometheus for success logging!")
|
verbose_logger.debug("reaches prometheus for success logging!")
|
||||||
|
@ -2993,6 +3005,10 @@ def function_setup(
|
||||||
):
|
):
|
||||||
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
|
_file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
|
||||||
messages = "audio_file"
|
messages = "audio_file"
|
||||||
|
elif (
|
||||||
|
call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
|
||||||
|
):
|
||||||
|
messages = kwargs.get("input", "speech")
|
||||||
stream = True if "stream" in kwargs and kwargs["stream"] == True else False
|
stream = True if "stream" in kwargs and kwargs["stream"] == True else False
|
||||||
logging_obj = Logging(
|
logging_obj = Logging(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -3334,6 +3350,8 @@ def client(original_function):
|
||||||
return result
|
return result
|
||||||
elif "atranscription" in kwargs and kwargs["atranscription"] == True:
|
elif "atranscription" in kwargs and kwargs["atranscription"] == True:
|
||||||
return result
|
return result
|
||||||
|
elif "aspeech" in kwargs and kwargs["aspeech"] == True:
|
||||||
|
return result
|
||||||
|
|
||||||
### POST-CALL RULES ###
|
### POST-CALL RULES ###
|
||||||
post_call_processing(original_response=result, model=model or None)
|
post_call_processing(original_response=result, model=model or None)
|
||||||
|
@ -5740,6 +5758,8 @@ def get_optional_params(
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
if temperature is not None:
|
if temperature is not None:
|
||||||
optional_params["temperature"] = temperature
|
optional_params["temperature"] = temperature
|
||||||
|
if seed is not None:
|
||||||
|
optional_params["seed"] = seed
|
||||||
if top_p is not None:
|
if top_p is not None:
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
if frequency_penalty is not None:
|
if frequency_penalty is not None:
|
||||||
|
@ -6392,6 +6412,8 @@ def get_supported_openai_params(
|
||||||
return ["stream", "temperature", "max_tokens"]
|
return ["stream", "temperature", "max_tokens"]
|
||||||
elif model.startswith("mistral"):
|
elif model.startswith("mistral"):
|
||||||
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
return ["max_tokens", "temperature", "stop", "top_p", "stream"]
|
||||||
|
elif custom_llm_provider == "ollama":
|
||||||
|
return litellm.OllamaConfig().get_supported_openai_params()
|
||||||
elif custom_llm_provider == "ollama_chat":
|
elif custom_llm_provider == "ollama_chat":
|
||||||
return litellm.OllamaChatConfig().get_supported_openai_params()
|
return litellm.OllamaChatConfig().get_supported_openai_params()
|
||||||
elif custom_llm_provider == "anthropic":
|
elif custom_llm_provider == "anthropic":
|
||||||
|
@ -6561,16 +6583,6 @@ def get_supported_openai_params(
|
||||||
]
|
]
|
||||||
elif custom_llm_provider == "cloudflare":
|
elif custom_llm_provider == "cloudflare":
|
||||||
return ["max_tokens", "stream"]
|
return ["max_tokens", "stream"]
|
||||||
elif custom_llm_provider == "ollama":
|
|
||||||
return [
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"top_p",
|
|
||||||
"temperature",
|
|
||||||
"frequency_penalty",
|
|
||||||
"stop",
|
|
||||||
"response_format",
|
|
||||||
]
|
|
||||||
elif custom_llm_provider == "nlp_cloud":
|
elif custom_llm_provider == "nlp_cloud":
|
||||||
return [
|
return [
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
|
|
|
@ -1265,8 +1265,8 @@
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0000015,
|
"input_cost_per_token": 0.000015,
|
||||||
"output_cost_per_token": 0.0000075,
|
"output_cost_per_token": 0.000075,
|
||||||
"litellm_provider": "vertex_ai-anthropic_models",
|
"litellm_provider": "vertex_ai-anthropic_models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.38.11"
|
version = "1.39.5"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -79,8 +79,10 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.38.11"
|
version = "1.39.5"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
plugins = "pydantic.mypy"
|
||||||
|
|
|
@ -12,6 +12,7 @@ sys.path.insert(
|
||||||
0, os.path.abspath("../")
|
0, os.path.abspath("../")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm.proxy._types import LitellmUserRoles
|
||||||
|
|
||||||
|
|
||||||
async def generate_team(
|
async def generate_team(
|
||||||
|
@ -731,7 +732,9 @@ async def test_key_delete_ui():
|
||||||
|
|
||||||
# generate a admin UI key
|
# generate a admin UI key
|
||||||
team = await generate_team(session=session)
|
team = await generate_team(session=session)
|
||||||
admin_ui_key = await generate_user(session=session, user_role="proxy_admin")
|
admin_ui_key = await generate_user(
|
||||||
|
session=session, user_role=LitellmUserRoles.PROXY_ADMIN.value
|
||||||
|
)
|
||||||
print(
|
print(
|
||||||
"trying to delete key=",
|
"trying to delete key=",
|
||||||
key,
|
key,
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{33786:function(e,n,u){Promise.resolve().then(u.bind(u,87494))},87494:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(3827),s=u(64090),r=u(47907),c=u(41134);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[359,134,971,69,744],function(){return e(e.s=33786)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/5d93d4a9fa59d72f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/33354d8285fe572e.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-766a329236c9a3f0.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/33354d8285fe572e.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45014,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"359\",\"static/chunks/359-f105a7fb61fe8110.js\",\"440\",\"static/chunks/440-b9a05f116e1a696d.js\",\"134\",\"static/chunks/134-4a7b43f992182f2c.js\",\"931\",\"static/chunks/app/page-f610596e5fb3cce4.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/33354d8285fe572e.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"PcGFjo5-03lHREJ3E0k6y\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
|
3:I[45014,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","359","static/chunks/359-f105a7fb61fe8110.js","440","static/chunks/440-b9a05f116e1a696d.js","134","static/chunks/134-4a7b43f992182f2c.js","931","static/chunks/app/page-f610596e5fb3cce4.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
1
ui/litellm-dashboard/out/model_hub.html
Normal file
1
ui/litellm-dashboard/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
7
ui/litellm-dashboard/out/model_hub.txt
Normal file
7
ui/litellm-dashboard/out/model_hub.txt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
2:I[77831,[],""]
|
||||||
|
3:I[87494,["359","static/chunks/359-f105a7fb61fe8110.js","134","static/chunks/134-4a7b43f992182f2c.js","418","static/chunks/app/model_hub/page-aa3c10cf9bb31255.js"],""]
|
||||||
|
4:I[5613,[],""]
|
||||||
|
5:I[31778,[],""]
|
||||||
|
0:["PcGFjo5-03lHREJ3E0k6y",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/33354d8285fe572e.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
|
1:null
|
|
@ -18,7 +18,7 @@ import Usage from "../components/usage";
|
||||||
import { jwtDecode } from "jwt-decode";
|
import { jwtDecode } from "jwt-decode";
|
||||||
import { Typography } from "antd";
|
import { Typography } from "antd";
|
||||||
|
|
||||||
export function formatUserRole(userRole: string) {
|
function formatUserRole(userRole: string) {
|
||||||
if (!userRole) {
|
if (!userRole) {
|
||||||
return "Undefined Role";
|
return "Undefined Role";
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ import {
|
||||||
User,
|
User,
|
||||||
setCallbacksCall,
|
setCallbacksCall,
|
||||||
invitationCreateCall,
|
invitationCreateCall,
|
||||||
|
getPossibleUserRoles,
|
||||||
} from "./networking";
|
} from "./networking";
|
||||||
|
|
||||||
const AdminPanel: React.FC<AdminPanelProps> = ({
|
const AdminPanel: React.FC<AdminPanelProps> = ({
|
||||||
|
@ -83,6 +84,9 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
|
||||||
useState(false);
|
useState(false);
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const [baseUrl, setBaseUrl] = useState("");
|
const [baseUrl, setBaseUrl] = useState("");
|
||||||
|
const [isInstructionsModalVisible, setIsInstructionsModalVisible] = useState(false);
|
||||||
|
const [possibleUIRoles, setPossibleUIRoles] = useState<null | Record<string, Record<string, string>>>(null);
|
||||||
|
|
||||||
|
|
||||||
let nonSssoUrl;
|
let nonSssoUrl;
|
||||||
try {
|
try {
|
||||||
|
@ -163,6 +167,9 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
|
||||||
console.log(`proxy admins: ${proxyAdmins}`);
|
console.log(`proxy admins: ${proxyAdmins}`);
|
||||||
console.log(`combinedList: ${combinedList}`);
|
console.log(`combinedList: ${combinedList}`);
|
||||||
setAdmins(combinedList);
|
setAdmins(combinedList);
|
||||||
|
|
||||||
|
const availableUserRoles = await getPossibleUserRoles(accessToken);
|
||||||
|
setPossibleUIRoles(availableUserRoles);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -435,7 +442,7 @@ const AdminPanel: React.FC<AdminPanelProps> = ({
|
||||||
? member["user_id"]
|
? member["user_id"]
|
||||||
: null}
|
: null}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell>{member["user_role"]}</TableCell>
|
<TableCell> {possibleUIRoles?.[member?.user_role]?.ui_label || "-"}</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<Icon
|
<Icon
|
||||||
icon={PencilAltIcon}
|
icon={PencilAltIcon}
|
||||||
|
|
|
@ -149,6 +149,12 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleKeyDown = (event: React.KeyboardEvent<HTMLInputElement>) => {
|
||||||
|
if (event.key === 'Enter') {
|
||||||
|
handleSendMessage();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const handleSendMessage = async () => {
|
const handleSendMessage = async () => {
|
||||||
if (inputMessage.trim() === "") return;
|
if (inputMessage.trim() === "") return;
|
||||||
|
|
||||||
|
@ -260,6 +266,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
type="text"
|
type="text"
|
||||||
value={inputMessage}
|
value={inputMessage}
|
||||||
onChange={(e) => setInputMessage(e.target.value)}
|
onChange={(e) => setInputMessage(e.target.value)}
|
||||||
|
onKeyDown={handleKeyDown} // Add this line
|
||||||
placeholder="Type your message..."
|
placeholder="Type your message..."
|
||||||
/>
|
/>
|
||||||
<Button
|
<Button
|
||||||
|
|
138
ui/litellm-dashboard/src/components/edit_user.tsx
Normal file
138
ui/litellm-dashboard/src/components/edit_user.tsx
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
import { useEffect, useState } from 'react';
|
||||||
|
import {
|
||||||
|
Dialog,
|
||||||
|
DialogPanel,
|
||||||
|
TextInput,
|
||||||
|
Button,
|
||||||
|
Select,
|
||||||
|
SelectItem,
|
||||||
|
Text,
|
||||||
|
Title,
|
||||||
|
Subtitle,
|
||||||
|
} from '@tremor/react';
|
||||||
|
|
||||||
|
import {
|
||||||
|
Button as Button2,
|
||||||
|
Modal,
|
||||||
|
Form,
|
||||||
|
Input,
|
||||||
|
Select as Select2,
|
||||||
|
InputNumber,
|
||||||
|
message,
|
||||||
|
} from "antd";
|
||||||
|
|
||||||
|
interface EditUserModalProps {
|
||||||
|
visible: boolean;
|
||||||
|
possibleUIRoles: null | Record<string, Record<string, string>>;
|
||||||
|
onCancel: () => void;
|
||||||
|
user: any;
|
||||||
|
onSubmit: (data: any) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
const EditUserModal: React.FC<EditUserModalProps> = ({ visible, possibleUIRoles, onCancel, user, onSubmit }) => {
|
||||||
|
const [editedUser, setEditedUser] = useState(user);
|
||||||
|
const [form] = Form.useForm();
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
form.resetFields();
|
||||||
|
}, [user]);
|
||||||
|
|
||||||
|
const handleCancel = async () => {
|
||||||
|
form.resetFields();
|
||||||
|
onCancel();
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
|
// Call API to update team with teamId and values
|
||||||
|
onSubmit(formValues);
|
||||||
|
form.resetFields();
|
||||||
|
onCancel();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (!user) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
|
||||||
|
<Modal
|
||||||
|
visible={visible}
|
||||||
|
onCancel={handleCancel}
|
||||||
|
footer={null}
|
||||||
|
title={"Edit User " + user.user_id}
|
||||||
|
width={1000}
|
||||||
|
>
|
||||||
|
<Form
|
||||||
|
form={form}
|
||||||
|
onFinish={handleEditSubmit}
|
||||||
|
initialValues={user} // Pass initial values here
|
||||||
|
labelCol={{ span: 8 }}
|
||||||
|
wrapperCol={{ span: 16 }}
|
||||||
|
labelAlign="left"
|
||||||
|
>
|
||||||
|
<>
|
||||||
|
<Form.Item
|
||||||
|
className="mt-8"
|
||||||
|
label="User Email"
|
||||||
|
tooltip="Email of the User"
|
||||||
|
name="user_email">
|
||||||
|
<TextInput />
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
label="user_id"
|
||||||
|
name="user_id"
|
||||||
|
hidden={true}
|
||||||
|
>
|
||||||
|
<TextInput />
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
label="User Role"
|
||||||
|
name="user_role"
|
||||||
|
>
|
||||||
|
<Select2>
|
||||||
|
{possibleUIRoles &&
|
||||||
|
Object.entries(possibleUIRoles).map(([role, { ui_label, description }]) => (
|
||||||
|
<SelectItem key={role} value={role} title={ui_label}>
|
||||||
|
<div className='flex'>
|
||||||
|
{ui_label} <p className="ml-2" style={{ color: "gray", fontSize: "12px" }}>{description}</p>
|
||||||
|
</div>
|
||||||
|
</SelectItem>
|
||||||
|
))}
|
||||||
|
</Select2>
|
||||||
|
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
label="Spend (USD)"
|
||||||
|
name="spend"
|
||||||
|
tooltip="(float) - Spend of all LLM calls completed by this user"
|
||||||
|
>
|
||||||
|
<InputNumber min={0} step={1} />
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
label="User Budget (USD)"
|
||||||
|
name="max_budget"
|
||||||
|
tooltip="(float) - Maximum budget of this user"
|
||||||
|
>
|
||||||
|
<InputNumber min={0} step={1} />
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<div style={{ textAlign: "right", marginTop: "10px" }}>
|
||||||
|
<Button2 htmlType="submit">Save</Button2>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</>
|
||||||
|
|
||||||
|
</Form>
|
||||||
|
|
||||||
|
|
||||||
|
</Modal>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default EditUserModal;
|
|
@ -79,7 +79,7 @@ const Sidebar: React.FC<SidebarProps> = ({
|
||||||
|
|
||||||
{userRole == "Admin" ? (
|
{userRole == "Admin" ? (
|
||||||
<Menu.Item key="5" onClick={() => setPage("users")}>
|
<Menu.Item key="5" onClick={() => setPage("users")}>
|
||||||
<Text>Users</Text>
|
<Text>Internal Users</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ const Sidebar: React.FC<SidebarProps> = ({
|
||||||
|
|
||||||
{userRole == "Admin" ? (
|
{userRole == "Admin" ? (
|
||||||
<Menu.Item key="9" onClick={() => setPage("budgets")}>
|
<Menu.Item key="9" onClick={() => setPage("budgets")}>
|
||||||
<Text>Rate Limits</Text>
|
<Text>Budgets</Text>
|
||||||
</Menu.Item>
|
</Menu.Item>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,8 @@ import {
|
||||||
getCallbacksCall,
|
getCallbacksCall,
|
||||||
setCallbacksCall,
|
setCallbacksCall,
|
||||||
modelSettingsCall,
|
modelSettingsCall,
|
||||||
|
adminGlobalActivityExceptions,
|
||||||
|
adminGlobalActivityExceptionsPerDeployment,
|
||||||
} from "./networking";
|
} from "./networking";
|
||||||
import { BarChart, AreaChart } from "@tremor/react";
|
import { BarChart, AreaChart } from "@tremor/react";
|
||||||
import {
|
import {
|
||||||
|
@ -109,6 +111,13 @@ interface RetryPolicyObject {
|
||||||
[key: string]: { [retryPolicyKey: string]: number } | undefined;
|
[key: string]: { [retryPolicyKey: string]: number } | undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
interface GlobalExceptionActivityData {
|
||||||
|
sum_num_rate_limit_exceptions: number;
|
||||||
|
daily_data: { date: string; num_rate_limit_exceptions: number; }[];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"]
|
//["OpenAI", "Azure OpenAI", "Anthropic", "Gemini (Google AI Studio)", "Amazon Bedrock", "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)"]
|
||||||
|
|
||||||
interface ProviderFields {
|
interface ProviderFields {
|
||||||
|
@ -301,6 +310,9 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
useState<RetryPolicyObject | null>(null);
|
useState<RetryPolicyObject | null>(null);
|
||||||
const [defaultRetry, setDefaultRetry] = useState<number>(0);
|
const [defaultRetry, setDefaultRetry] = useState<number>(0);
|
||||||
|
|
||||||
|
const [globalExceptionData, setGlobalExceptionData] = useState<GlobalExceptionActivityData>({} as GlobalExceptionActivityData);
|
||||||
|
const [globalExceptionPerDeployment, setGlobalExceptionPerDeployment] = useState<any[]>([]);
|
||||||
|
|
||||||
function formatCreatedAt(createdAt: string | null) {
|
function formatCreatedAt(createdAt: string | null) {
|
||||||
if (createdAt) {
|
if (createdAt) {
|
||||||
const date = new Date(createdAt);
|
const date = new Date(createdAt);
|
||||||
|
@ -643,6 +655,29 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
dateValue.to?.toISOString()
|
dateValue.to?.toISOString()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const dailyExceptions = await adminGlobalActivityExceptions(
|
||||||
|
accessToken,
|
||||||
|
dateValue.from?.toISOString().split('T')[0],
|
||||||
|
dateValue.to?.toISOString().split('T')[0],
|
||||||
|
_initial_model_group,
|
||||||
|
);
|
||||||
|
|
||||||
|
setGlobalExceptionData(dailyExceptions);
|
||||||
|
|
||||||
|
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
|
||||||
|
accessToken,
|
||||||
|
dateValue.from?.toISOString().split('T')[0],
|
||||||
|
dateValue.to?.toISOString().split('T')[0],
|
||||||
|
_initial_model_group,
|
||||||
|
)
|
||||||
|
|
||||||
|
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
|
||||||
|
|
||||||
|
console.log("dailyExceptions:", dailyExceptions);
|
||||||
|
|
||||||
|
console.log("dailyExceptionsPerDeplyment:", dailyExceptionsPerDeplyment);
|
||||||
|
|
||||||
|
|
||||||
console.log("slowResponses:", slowResponses);
|
console.log("slowResponses:", slowResponses);
|
||||||
|
|
||||||
setSlowResponsesData(slowResponses);
|
setSlowResponsesData(slowResponses);
|
||||||
|
@ -905,6 +940,30 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
console.log("slowResponses:", slowResponses);
|
console.log("slowResponses:", slowResponses);
|
||||||
|
|
||||||
setSlowResponsesData(slowResponses);
|
setSlowResponsesData(slowResponses);
|
||||||
|
|
||||||
|
|
||||||
|
if (modelGroup) {
|
||||||
|
const dailyExceptions = await adminGlobalActivityExceptions(
|
||||||
|
accessToken,
|
||||||
|
startTime?.toISOString().split('T')[0],
|
||||||
|
endTime?.toISOString().split('T')[0],
|
||||||
|
modelGroup,
|
||||||
|
);
|
||||||
|
|
||||||
|
setGlobalExceptionData(dailyExceptions);
|
||||||
|
|
||||||
|
const dailyExceptionsPerDeplyment = await adminGlobalActivityExceptionsPerDeployment(
|
||||||
|
accessToken,
|
||||||
|
startTime?.toISOString().split('T')[0],
|
||||||
|
endTime?.toISOString().split('T')[0],
|
||||||
|
modelGroup,
|
||||||
|
)
|
||||||
|
|
||||||
|
setGlobalExceptionPerDeployment(dailyExceptionsPerDeplyment);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Failed to fetch model metrics", error);
|
console.error("Failed to fetch model metrics", error);
|
||||||
}
|
}
|
||||||
|
@ -1475,7 +1534,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
)}
|
)}
|
||||||
{selectedProvider != Providers.Bedrock &&
|
{selectedProvider != Providers.Bedrock &&
|
||||||
selectedProvider != Providers.Vertex_AI &&
|
selectedProvider != Providers.Vertex_AI &&
|
||||||
dynamicProviderForm === undefined && (
|
(dynamicProviderForm === undefined ||
|
||||||
|
dynamicProviderForm.fields.length == 0) && (
|
||||||
<Form.Item
|
<Form.Item
|
||||||
rules={[{ required: true, message: "Required" }]}
|
rules={[{ required: true, message: "Required" }]}
|
||||||
label="API Key"
|
label="API Key"
|
||||||
|
@ -1777,18 +1837,110 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
</Card>
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Grid>
|
</Grid>
|
||||||
<Card className="mt-4">
|
|
||||||
<Title>Exceptions per Model</Title>
|
<Grid numItems={1} className="gap-2 w-full mt-2">
|
||||||
<BarChart
|
<Card>
|
||||||
className="h-72"
|
<Title>All Up Rate Limit Errors (429) for {selectedModelGroup}</Title>
|
||||||
data={modelExceptions}
|
<Grid numItems={1}>
|
||||||
index="model"
|
<Col>
|
||||||
categories={allExceptions}
|
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors { (globalExceptionData.sum_num_rate_limit_exceptions)}</Subtitle>
|
||||||
stack={true}
|
<BarChart
|
||||||
colors={["indigo-300", "rose-200", "#ffcc33"]}
|
className="h-40"
|
||||||
yAxisWidth={30}
|
data={globalExceptionData.daily_data}
|
||||||
/>
|
index="date"
|
||||||
</Card>
|
colors={['rose']}
|
||||||
|
categories={['num_rate_limit_exceptions']}
|
||||||
|
onValueChange={(v) => console.log(v)}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
|
<Col>
|
||||||
|
|
||||||
|
{/* <BarChart
|
||||||
|
className="h-40"
|
||||||
|
data={modelExceptions}
|
||||||
|
index="model"
|
||||||
|
categories={allExceptions}
|
||||||
|
stack={true}
|
||||||
|
yAxisWidth={30}
|
||||||
|
/> */}
|
||||||
|
|
||||||
|
|
||||||
|
</Col>
|
||||||
|
|
||||||
|
</Grid>
|
||||||
|
|
||||||
|
|
||||||
|
</Card>
|
||||||
|
|
||||||
|
{
|
||||||
|
premiumUser ? (
|
||||||
|
<>
|
||||||
|
{globalExceptionPerDeployment.map((globalActivity, index) => (
|
||||||
|
<Card key={index}>
|
||||||
|
<Title>{globalActivity.api_base ? globalActivity.api_base : "Unknown API Base"}</Title>
|
||||||
|
<Grid numItems={1}>
|
||||||
|
<Col>
|
||||||
|
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Num Rate Limit Errors (429) {(globalActivity.sum_num_rate_limit_exceptions)}</Subtitle>
|
||||||
|
<BarChart
|
||||||
|
className="h-40"
|
||||||
|
data={globalActivity.daily_data}
|
||||||
|
index="date"
|
||||||
|
colors={['rose']}
|
||||||
|
categories={['num_rate_limit_exceptions']}
|
||||||
|
|
||||||
|
onValueChange={(v) => console.log(v)}
|
||||||
|
/>
|
||||||
|
|
||||||
|
</Col>
|
||||||
|
</Grid>
|
||||||
|
</Card>
|
||||||
|
))}
|
||||||
|
</>
|
||||||
|
) :
|
||||||
|
<>
|
||||||
|
{globalExceptionPerDeployment && globalExceptionPerDeployment.length > 0 &&
|
||||||
|
globalExceptionPerDeployment.slice(0, 1).map((globalActivity, index) => (
|
||||||
|
<Card key={index}>
|
||||||
|
<Title>✨ Rate Limit Errors by Deployment</Title>
|
||||||
|
<p className="mb-2 text-gray-500 italic text-[12px]">Upgrade to see exceptions for all deployments</p>
|
||||||
|
<Button variant="primary" className="mb-2">
|
||||||
|
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
|
||||||
|
Get Free Trial
|
||||||
|
</a>
|
||||||
|
</Button>
|
||||||
|
<Card>
|
||||||
|
<Title>{globalActivity.api_base}</Title>
|
||||||
|
<Grid numItems={1}>
|
||||||
|
<Col>
|
||||||
|
<Subtitle
|
||||||
|
style={{
|
||||||
|
fontSize: "15px",
|
||||||
|
fontWeight: "normal",
|
||||||
|
color: "#535452",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Num Rate Limit Errors {(globalActivity.sum_num_rate_limit_exceptions)}
|
||||||
|
</Subtitle>
|
||||||
|
<BarChart
|
||||||
|
className="h-40"
|
||||||
|
data={globalActivity.daily_data}
|
||||||
|
index="date"
|
||||||
|
colors={['rose']}
|
||||||
|
categories={['num_rate_limit_exceptions']}
|
||||||
|
|
||||||
|
onValueChange={(v) => console.log(v)}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
|
|
||||||
|
|
||||||
|
</Grid>
|
||||||
|
</Card>
|
||||||
|
</Card>
|
||||||
|
))}
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
</Grid>
|
||||||
|
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
<div className="flex items-center">
|
<div className="flex items-center">
|
||||||
|
|
|
@ -39,7 +39,9 @@ const Navbar: React.FC<NavbarProps> = ({
|
||||||
|
|
||||||
// const userColors = require('./ui_colors.json') || {};
|
// const userColors = require('./ui_colors.json') || {};
|
||||||
const isLocal = process.env.NODE_ENV === "development";
|
const isLocal = process.env.NODE_ENV === "development";
|
||||||
|
const proxyBaseUrl = isLocal ? "http://localhost:4000" : null;
|
||||||
const imageUrl = isLocal ? "http://localhost:4000/get_image" : "/get_image";
|
const imageUrl = isLocal ? "http://localhost:4000/get_image" : "/get_image";
|
||||||
|
const logoutUrl = proxyBaseUrl ? `${proxyBaseUrl}` : `/`;
|
||||||
|
|
||||||
const items: MenuProps["items"] = [
|
const items: MenuProps["items"] = [
|
||||||
{
|
{
|
||||||
|
@ -52,6 +54,14 @@ const Navbar: React.FC<NavbarProps> = ({
|
||||||
</>
|
</>
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
key: "2",
|
||||||
|
label: (
|
||||||
|
<Link href={logoutUrl}>
|
||||||
|
<p>Logout</p>
|
||||||
|
</Link>
|
||||||
|
),
|
||||||
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
|
@ -1270,6 +1270,100 @@ export const adminGlobalActivityPerModel = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
export const adminGlobalActivityExceptions = async (
|
||||||
|
accessToken: String,
|
||||||
|
startTime: String | undefined,
|
||||||
|
endTime: String | undefined,
|
||||||
|
modelGroup: String,
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/global/activity/exceptions`
|
||||||
|
: `/global/activity/exceptions`;
|
||||||
|
|
||||||
|
if (startTime && endTime) {
|
||||||
|
url += `?start_date=${startTime}&end_date=${endTime}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (modelGroup) {
|
||||||
|
url += `&model_group=${modelGroup}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestOptions: {
|
||||||
|
method: string;
|
||||||
|
headers: {
|
||||||
|
Authorization: string;
|
||||||
|
};
|
||||||
|
} = {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const response = await fetch(url, requestOptions);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to fetch spend data:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const adminGlobalActivityExceptionsPerDeployment = async (
|
||||||
|
accessToken: String,
|
||||||
|
startTime: String | undefined,
|
||||||
|
endTime: String | undefined,
|
||||||
|
modelGroup: String,
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/global/activity/exceptions/deployment`
|
||||||
|
: `/global/activity/exceptions/deployment`;
|
||||||
|
|
||||||
|
if (startTime && endTime) {
|
||||||
|
url += `?start_date=${startTime}&end_date=${endTime}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (modelGroup) {
|
||||||
|
url += `&model_group=${modelGroup}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestOptions: {
|
||||||
|
method: string;
|
||||||
|
headers: {
|
||||||
|
Authorization: string;
|
||||||
|
};
|
||||||
|
} = {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const response = await fetch(url, requestOptions);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);
|
||||||
|
return data;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to fetch spend data:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
export const adminTopModelsCall = async (accessToken: String) => {
|
export const adminTopModelsCall = async (accessToken: String) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl
|
let url = proxyBaseUrl
|
||||||
|
@ -1465,6 +1559,34 @@ export const userGetAllUsersCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getPossibleUserRoles = async (
|
||||||
|
accessToken: String,
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
const url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/user/available_roles`
|
||||||
|
: `/user/available_roles`;
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
const data = await response.json();
|
||||||
|
console.log("response from user/available_role", data);
|
||||||
|
return data;
|
||||||
|
// Handle success - you might want to update some state or UI based on the created key
|
||||||
|
} catch (error) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
export const teamCreateCall = async (
|
export const teamCreateCall = async (
|
||||||
accessToken: string,
|
accessToken: string,
|
||||||
formValues: Record<string, any> // Assuming formValues is an object
|
formValues: Record<string, any> // Assuming formValues is an object
|
||||||
|
|
|
@ -188,6 +188,43 @@ const Settings: React.FC<SettingsPageProps> = ({
|
||||||
console.log("Selected values:", values);
|
console.log("Selected values:", values);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleSaveEmailSettings = () => {
|
||||||
|
if (!accessToken) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
let updatedVariables: Record<string, string> = {};
|
||||||
|
|
||||||
|
alerts
|
||||||
|
.filter((alert) => alert.name === "email")
|
||||||
|
.forEach((alert) => {
|
||||||
|
Object.entries(alert.variables ?? {}).forEach(([key, value]) => {
|
||||||
|
const inputElement = document.querySelector(`input[name="${key}"]`) as HTMLInputElement;
|
||||||
|
if (inputElement && inputElement.value) {
|
||||||
|
updatedVariables[key] = inputElement?.value;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("updatedVariables", updatedVariables);
|
||||||
|
//filter out null / undefined values for updatedVariables
|
||||||
|
|
||||||
|
const payload = {
|
||||||
|
general_settings: {
|
||||||
|
alerting: ["email"],
|
||||||
|
},
|
||||||
|
environment_variables: updatedVariables,
|
||||||
|
};
|
||||||
|
try {
|
||||||
|
setCallbacksCall(accessToken, payload);
|
||||||
|
} catch (error) {
|
||||||
|
message.error("Failed to update alerts: " + error, 20);
|
||||||
|
}
|
||||||
|
|
||||||
|
message.success("Email settings updated successfully");
|
||||||
|
}
|
||||||
|
|
||||||
const handleSaveAlerts = () => {
|
const handleSaveAlerts = () => {
|
||||||
if (!accessToken) {
|
if (!accessToken) {
|
||||||
return;
|
return;
|
||||||
|
@ -369,7 +406,8 @@ const Settings: React.FC<SettingsPageProps> = ({
|
||||||
<TabList variant="line" defaultValue="1">
|
<TabList variant="line" defaultValue="1">
|
||||||
<Tab value="1">Logging Callbacks</Tab>
|
<Tab value="1">Logging Callbacks</Tab>
|
||||||
<Tab value="2">Alerting Types</Tab>
|
<Tab value="2">Alerting Types</Tab>
|
||||||
<Tab value="2">Alerting Settings</Tab>
|
<Tab value="3">Alerting Settings</Tab>
|
||||||
|
<Tab value="4">Email Alerts</Tab>
|
||||||
</TabList>
|
</TabList>
|
||||||
<TabPanels>
|
<TabPanels>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
|
@ -526,6 +564,142 @@ const Settings: React.FC<SettingsPageProps> = ({
|
||||||
premiumUser={premiumUser}
|
premiumUser={premiumUser}
|
||||||
/>
|
/>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
|
<TabPanel>
|
||||||
|
<Card>
|
||||||
|
<Title>Email Settings</Title>
|
||||||
|
<Text>
|
||||||
|
<a href="https://docs.litellm.ai/docs/proxy/email" target="_blank" style={{ color: "blue" }}> LiteLLM Docs: email alerts</a> <br/>
|
||||||
|
</Text>
|
||||||
|
<div className="flex w-full">
|
||||||
|
{alerts
|
||||||
|
.filter((alert) => alert.name === "email")
|
||||||
|
.map((alert, index) => (
|
||||||
|
<TableCell key={index}>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<Grid numItems={2}>
|
||||||
|
{Object.entries(alert.variables ?? {}).map(([key, value]) => (
|
||||||
|
<li key={key} className="mx-2 my-2">
|
||||||
|
|
||||||
|
{ premiumUser!= true && (key === "EMAIL_LOGO_URL" || key === "EMAIL_SUPPORT_CONTACT") ? (
|
||||||
|
<div>
|
||||||
|
<a
|
||||||
|
href="https://forms.gle/W3U4PZpJGFHWtHyA9"
|
||||||
|
target="_blank"
|
||||||
|
>
|
||||||
|
<Text className="mt-2">
|
||||||
|
{" "}
|
||||||
|
✨ {key}
|
||||||
|
|
||||||
|
</Text>
|
||||||
|
|
||||||
|
</a>
|
||||||
|
<TextInput
|
||||||
|
name={key}
|
||||||
|
defaultValue={value as string}
|
||||||
|
type="password"
|
||||||
|
disabled={true}
|
||||||
|
style={{ width: "400px" }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
) : (
|
||||||
|
<div>
|
||||||
|
<Text className="mt-2">{key}</Text>
|
||||||
|
<TextInput
|
||||||
|
name={key}
|
||||||
|
defaultValue={value as string}
|
||||||
|
type="password"
|
||||||
|
style={{ width: "400px" }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Added descriptions for input fields */}
|
||||||
|
<p style={{ fontSize: "small", fontStyle: "italic" }}>
|
||||||
|
{key === "SMTP_HOST" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
Enter the SMTP host address, e.g. `smtp.resend.com`
|
||||||
|
<span style={{ color: "red" }}> Required * </span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
)}
|
||||||
|
|
||||||
|
{key === "SMTP_PORT" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
Enter the SMTP port number, e.g. `587`
|
||||||
|
<span style={{ color: "red" }}> Required * </span>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
)}
|
||||||
|
|
||||||
|
{key === "SMTP_USERNAME" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
Enter the SMTP username, e.g. `username`
|
||||||
|
<span style={{ color: "red" }}> Required * </span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
)}
|
||||||
|
|
||||||
|
{key === "SMTP_PASSWORD" && (
|
||||||
|
<span style={{ color: "red" }}> Required * </span>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{key === "SMTP_SENDER_EMAIL" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
Enter the sender email address, e.g. `sender@berri.ai`
|
||||||
|
<span style={{ color: "red" }}> Required * </span>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{key === "TEST_EMAIL_ADDRESS" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
Email Address to send `Test Email Alert` to. example: `info@berri.ai`
|
||||||
|
<span style={{ color: "red" }}> Required * </span>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
{key === "EMAIL_LOGO_URL" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
(Optional) Customize the Logo that appears in the email, pass a url to your logo
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
{key === "EMAIL_SUPPORT_CONTACT" && (
|
||||||
|
<div style={{ color: "gray" }}>
|
||||||
|
(Optional) Customize the support email address that appears in the email. Default is support@berri.ai
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</Grid>
|
||||||
|
</ul>
|
||||||
|
</TableCell>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
className="mt-2"
|
||||||
|
onClick={() => handleSaveEmailSettings()}
|
||||||
|
>
|
||||||
|
Save Changes
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
onClick={() =>
|
||||||
|
serviceHealthCheck(accessToken, "email")
|
||||||
|
}
|
||||||
|
className="mx-2"
|
||||||
|
>
|
||||||
|
Test Email Alerts
|
||||||
|
</Button>
|
||||||
|
|
||||||
|
</Card>
|
||||||
|
</TabPanel>
|
||||||
</TabPanels>
|
</TabPanels>
|
||||||
</TabGroup>
|
</TabGroup>
|
||||||
</Grid>
|
</Grid>
|
||||||
|
|
|
@ -162,6 +162,17 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
console.log("keys in usage", keys);
|
console.log("keys in usage", keys);
|
||||||
console.log("premium user in usage", premiumUser);
|
console.log("premium user in usage", premiumUser);
|
||||||
|
|
||||||
|
function valueFormatterNumbers(number: number) {
|
||||||
|
const formatter = new Intl.NumberFormat('en-US', {
|
||||||
|
maximumFractionDigits: 0,
|
||||||
|
notation: 'compact',
|
||||||
|
compactDisplay: 'short',
|
||||||
|
});
|
||||||
|
|
||||||
|
return formatter.format(number);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const updateEndUserData = async (startTime: Date | undefined, endTime: Date | undefined, uiSelectedKey: string | null) => {
|
const updateEndUserData = async (startTime: Date | undefined, endTime: Date | undefined, uiSelectedKey: string | null) => {
|
||||||
if (!startTime || !endTime || !accessToken) {
|
if (!startTime || !endTime || !accessToken) {
|
||||||
return;
|
return;
|
||||||
|
@ -482,10 +493,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
<Title>All Up</Title>
|
<Title>All Up</Title>
|
||||||
<Grid numItems={2}>
|
<Grid numItems={2}>
|
||||||
<Col>
|
<Col>
|
||||||
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {globalActivity.sum_api_requests}</Subtitle>
|
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests { valueFormatterNumbers(globalActivity.sum_api_requests)}</Subtitle>
|
||||||
<AreaChart
|
<AreaChart
|
||||||
className="h-40"
|
className="h-40"
|
||||||
data={globalActivity.daily_data}
|
data={globalActivity.daily_data}
|
||||||
|
valueFormatter={valueFormatterNumbers}
|
||||||
index="date"
|
index="date"
|
||||||
colors={['cyan']}
|
colors={['cyan']}
|
||||||
categories={['api_requests']}
|
categories={['api_requests']}
|
||||||
|
@ -494,10 +506,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
|
|
||||||
</Col>
|
</Col>
|
||||||
<Col>
|
<Col>
|
||||||
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {globalActivity.sum_total_tokens}</Subtitle>
|
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens { valueFormatterNumbers(globalActivity.sum_total_tokens)}</Subtitle>
|
||||||
<BarChart
|
<BarChart
|
||||||
className="h-40"
|
className="h-40"
|
||||||
data={globalActivity.daily_data}
|
data={globalActivity.daily_data}
|
||||||
|
valueFormatter={valueFormatterNumbers}
|
||||||
index="date"
|
index="date"
|
||||||
colors={['cyan']}
|
colors={['cyan']}
|
||||||
categories={['total_tokens']}
|
categories={['total_tokens']}
|
||||||
|
@ -517,24 +530,26 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
<Title>{globalActivity.model}</Title>
|
<Title>{globalActivity.model}</Title>
|
||||||
<Grid numItems={2}>
|
<Grid numItems={2}>
|
||||||
<Col>
|
<Col>
|
||||||
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {globalActivity.sum_api_requests}</Subtitle>
|
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>API Requests {valueFormatterNumbers(globalActivity.sum_api_requests)}</Subtitle>
|
||||||
<AreaChart
|
<AreaChart
|
||||||
className="h-40"
|
className="h-40"
|
||||||
data={globalActivity.daily_data}
|
data={globalActivity.daily_data}
|
||||||
index="date"
|
index="date"
|
||||||
colors={['cyan']}
|
colors={['cyan']}
|
||||||
categories={['api_requests']}
|
categories={['api_requests']}
|
||||||
|
valueFormatter={valueFormatterNumbers}
|
||||||
onValueChange={(v) => console.log(v)}
|
onValueChange={(v) => console.log(v)}
|
||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
<Col>
|
<Col>
|
||||||
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {globalActivity.sum_total_tokens}</Subtitle>
|
<Subtitle style={{ fontSize: "15px", fontWeight: "normal", color: "#535452"}}>Tokens {valueFormatterNumbers(globalActivity.sum_total_tokens)}</Subtitle>
|
||||||
<BarChart
|
<BarChart
|
||||||
className="h-40"
|
className="h-40"
|
||||||
data={globalActivity.daily_data}
|
data={globalActivity.daily_data}
|
||||||
index="date"
|
index="date"
|
||||||
colors={['cyan']}
|
colors={['cyan']}
|
||||||
categories={['total_tokens']}
|
categories={['total_tokens']}
|
||||||
|
valueFormatter={valueFormatterNumbers}
|
||||||
onValueChange={(v) => console.log(v)}
|
onValueChange={(v) => console.log(v)}
|
||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -565,7 +580,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
color: "#535452",
|
color: "#535452",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
API Requests {globalActivity.sum_api_requests}
|
API Requests {valueFormatterNumbers(globalActivity.sum_api_requests)}
|
||||||
</Subtitle>
|
</Subtitle>
|
||||||
<AreaChart
|
<AreaChart
|
||||||
className="h-40"
|
className="h-40"
|
||||||
|
@ -573,6 +588,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
index="date"
|
index="date"
|
||||||
colors={['cyan']}
|
colors={['cyan']}
|
||||||
categories={['api_requests']}
|
categories={['api_requests']}
|
||||||
|
valueFormatter={valueFormatterNumbers}
|
||||||
onValueChange={(v) => console.log(v)}
|
onValueChange={(v) => console.log(v)}
|
||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -584,13 +600,14 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
color: "#535452",
|
color: "#535452",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Tokens {globalActivity.sum_total_tokens}
|
Tokens {valueFormatterNumbers(globalActivity.sum_total_tokens)}
|
||||||
</Subtitle>
|
</Subtitle>
|
||||||
<BarChart
|
<BarChart
|
||||||
className="h-40"
|
className="h-40"
|
||||||
data={globalActivity.daily_data}
|
data={globalActivity.daily_data}
|
||||||
index="date"
|
index="date"
|
||||||
colors={['cyan']}
|
colors={['cyan']}
|
||||||
|
valueFormatter={valueFormatterNumbers}
|
||||||
categories={['total_tokens']}
|
categories={['total_tokens']}
|
||||||
onValueChange={(v) => console.log(v)}
|
onValueChange={(v) => console.log(v)}
|
||||||
/>
|
/>
|
||||||
|
|
|
@ -24,12 +24,22 @@ import {
|
||||||
Icon,
|
Icon,
|
||||||
TextInput,
|
TextInput,
|
||||||
} from "@tremor/react";
|
} from "@tremor/react";
|
||||||
import { userInfoCall } from "./networking";
|
|
||||||
|
import {
|
||||||
|
message,
|
||||||
|
} from "antd";
|
||||||
|
|
||||||
|
import { userInfoCall, userUpdateUserCall, getPossibleUserRoles } from "./networking";
|
||||||
import { Badge, BadgeDelta, Button } from "@tremor/react";
|
import { Badge, BadgeDelta, Button } from "@tremor/react";
|
||||||
import RequestAccess from "./request_model_access";
|
import RequestAccess from "./request_model_access";
|
||||||
import CreateUser from "./create_user_button";
|
import CreateUser from "./create_user_button";
|
||||||
|
import EditUserModal from "./edit_user";
|
||||||
import Paragraph from "antd/es/skeleton/Paragraph";
|
import Paragraph from "antd/es/skeleton/Paragraph";
|
||||||
import InformationCircleIcon from "@heroicons/react/outline/InformationCircleIcon";
|
import {
|
||||||
|
PencilAltIcon,
|
||||||
|
InformationCircleIcon,
|
||||||
|
TrashIcon,
|
||||||
|
} from "@heroicons/react/outline";
|
||||||
|
|
||||||
interface ViewUserDashboardProps {
|
interface ViewUserDashboardProps {
|
||||||
accessToken: string | null;
|
accessToken: string | null;
|
||||||
|
@ -55,8 +65,40 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
const [currentPage, setCurrentPage] = useState(0);
|
const [currentPage, setCurrentPage] = useState(0);
|
||||||
const [openDialogId, setOpenDialogId] = React.useState<null | number>(null);
|
const [openDialogId, setOpenDialogId] = React.useState<null | number>(null);
|
||||||
const [selectedItem, setSelectedItem] = useState<null | any>(null);
|
const [selectedItem, setSelectedItem] = useState<null | any>(null);
|
||||||
|
const [editModalVisible, setEditModalVisible] = useState(false);
|
||||||
|
const [selectedUser, setSelectedUser] = useState(null);
|
||||||
|
const [possibleUIRoles, setPossibleUIRoles] = useState<Record<string, Record<string, string>>>({});
|
||||||
const defaultPageSize = 25;
|
const defaultPageSize = 25;
|
||||||
|
|
||||||
|
const handleEditCancel = async () => {
|
||||||
|
setSelectedUser(null);
|
||||||
|
setEditModalVisible(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleEditSubmit = async (editedUser: any) => {
|
||||||
|
console.log("inside handleEditSubmit:", editedUser);
|
||||||
|
|
||||||
|
if (!accessToken || !token || !userRole || !userID) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await userUpdateUserCall(accessToken, editedUser, null);
|
||||||
|
message.success(`User ${editedUser.user_id} updated successfully`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("There was an error updating the user", error);
|
||||||
|
}
|
||||||
|
if (userData) {
|
||||||
|
const updatedUserData = userData.map((user) =>
|
||||||
|
user.user_id === editedUser.user_id ? editedUser : user
|
||||||
|
);
|
||||||
|
setUserData(updatedUserData);
|
||||||
|
}
|
||||||
|
setSelectedUser(null);
|
||||||
|
setEditModalVisible(false);
|
||||||
|
// Close the modal
|
||||||
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!accessToken || !token || !userRole || !userID) {
|
if (!accessToken || !token || !userRole || !userID) {
|
||||||
return;
|
return;
|
||||||
|
@ -74,11 +116,16 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
);
|
);
|
||||||
console.log("user data response:", userDataResponse);
|
console.log("user data response:", userDataResponse);
|
||||||
setUserData(userDataResponse);
|
setUserData(userDataResponse);
|
||||||
|
|
||||||
|
const availableUserRoles = await getPossibleUserRoles(accessToken);
|
||||||
|
setPossibleUIRoles(availableUserRoles);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("There was an error fetching the model data", error);
|
console.error("There was an error fetching the model data", error);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
if (accessToken && token && userRole && userID) {
|
if (accessToken && token && userRole && userID) {
|
||||||
fetchData();
|
fetchData();
|
||||||
}
|
}
|
||||||
|
@ -126,14 +173,10 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div style={{ width: "100%" }}>
|
<div style={{ width: "100%" }}>
|
||||||
<Grid className="gap-2 p-2 h-[80vh] w-full mt-8">
|
<Grid className="gap-2 p-2 h-[90vh] w-full mt-8">
|
||||||
<CreateUser userID={userID} accessToken={accessToken} teams={teams} />
|
<CreateUser userID={userID} accessToken={accessToken} teams={teams} />
|
||||||
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4">
|
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[90vh] mb-4">
|
||||||
<div className="mb-4 mt-1">
|
<div className="mb-4 mt-1">
|
||||||
<Text>
|
|
||||||
These are Users on LiteLLM that created API Keys. Automatically
|
|
||||||
tracked by LiteLLM
|
|
||||||
</Text>
|
|
||||||
</div>
|
</div>
|
||||||
<TabGroup>
|
<TabGroup>
|
||||||
<TabPanels>
|
<TabPanels>
|
||||||
|
@ -143,25 +186,23 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
<TableRow>
|
<TableRow>
|
||||||
<TableHeaderCell>User ID</TableHeaderCell>
|
<TableHeaderCell>User ID</TableHeaderCell>
|
||||||
<TableHeaderCell>User Email</TableHeaderCell>
|
<TableHeaderCell>User Email</TableHeaderCell>
|
||||||
<TableHeaderCell>User Models</TableHeaderCell>
|
<TableHeaderCell>Role</TableHeaderCell>
|
||||||
<TableHeaderCell>User Spend ($ USD)</TableHeaderCell>
|
<TableHeaderCell>User Spend ($ USD)</TableHeaderCell>
|
||||||
<TableHeaderCell>User Max Budget ($ USD)</TableHeaderCell>
|
<TableHeaderCell>User Max Budget ($ USD)</TableHeaderCell>
|
||||||
<TableHeaderCell>User API Key Aliases</TableHeaderCell>
|
<TableHeaderCell>API Keys</TableHeaderCell>
|
||||||
|
<TableHeaderCell></TableHeaderCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
</TableHead>
|
</TableHead>
|
||||||
<TableBody>
|
<TableBody>
|
||||||
{userData.map((user: any) => (
|
{userData.map((user: any) => (
|
||||||
<TableRow key={user.user_id}>
|
<TableRow key={user.user_id}>
|
||||||
<TableCell>{user.user_id}</TableCell>
|
<TableCell>{user.user_id || "-"}</TableCell>
|
||||||
<TableCell>{user.user_email}</TableCell>
|
<TableCell>{user.user_email || "-"}</TableCell>
|
||||||
|
|
||||||
<TableCell>
|
<TableCell>
|
||||||
{user.models && user.models.length > 0
|
{possibleUIRoles?.[user?.user_role]?.ui_label || "-"}
|
||||||
? user.models
|
|
||||||
: "All Models"}
|
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
{user.spend ? user.spend?.toFixed(2) : 0}
|
{user.spend ? user.spend?.toFixed(2) : "-"}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
{user.max_budget ? user.max_budget : "Unlimited"}
|
{user.max_budget ? user.max_budget : "Unlimited"}
|
||||||
|
@ -173,9 +214,13 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
(key: any) => key !== null
|
(key: any) => key !== null
|
||||||
).length > 0 ? (
|
).length > 0 ? (
|
||||||
<Badge size={"xs"} color={"indigo"}>
|
<Badge size={"xs"} color={"indigo"}>
|
||||||
{user.key_aliases
|
{
|
||||||
.filter((key: any) => key !== null)
|
user.key_aliases.filter(
|
||||||
.join(", ")}
|
(key: any) => key !== null
|
||||||
|
).length
|
||||||
|
|
||||||
|
}
|
||||||
|
Keys
|
||||||
</Badge>
|
</Badge>
|
||||||
) : (
|
) : (
|
||||||
<Badge size={"xs"} color={"gray"}>
|
<Badge size={"xs"} color={"gray"}>
|
||||||
|
@ -188,12 +233,23 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
</Badge>
|
</Badge>
|
||||||
)}
|
)}
|
||||||
{/* <Text>{user.key_aliases.filter(key => key !== null).length} Keys</Text> */}
|
{/* <Text>{user.key_aliases.filter(key => key !== null).length} Keys</Text> */}
|
||||||
{/* <Icon icon={InformationCircleIcon} onClick= {() => {
|
</Grid>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
|
||||||
|
<Icon icon={PencilAltIcon} onClick= {() => {
|
||||||
|
setSelectedUser(user)
|
||||||
|
setEditModalVisible(true)
|
||||||
|
}}>View Keys</Icon>
|
||||||
|
{/*
|
||||||
|
<Icon icon={TrashIcon} onClick= {() => {
|
||||||
setOpenDialogId(user.user_id)
|
setOpenDialogId(user.user_id)
|
||||||
setSelectedItem(user)
|
setSelectedItem(user)
|
||||||
}}>View Keys</Icon> */}
|
}}>View Keys</Icon> */}
|
||||||
</Grid>
|
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
|
|
||||||
</TableRow>
|
</TableRow>
|
||||||
))}
|
))}
|
||||||
</TableBody>
|
</TableBody>
|
||||||
|
@ -226,30 +282,16 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
</TabPanels>
|
</TabPanels>
|
||||||
</TabGroup>
|
</TabGroup>
|
||||||
|
<EditUserModal
|
||||||
|
visible={editModalVisible}
|
||||||
|
possibleUIRoles={possibleUIRoles}
|
||||||
|
onCancel={handleEditCancel}
|
||||||
|
user={selectedUser}
|
||||||
|
onSubmit={handleEditSubmit}
|
||||||
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
{renderPagination()}
|
{renderPagination()}
|
||||||
</Grid>
|
</Grid>
|
||||||
{/* <Dialog
|
|
||||||
open={openDialogId !== null}
|
|
||||||
onClose={() => {
|
|
||||||
setOpenDialogId(null);
|
|
||||||
}}
|
|
||||||
|
|
||||||
>
|
|
||||||
<DialogPanel>
|
|
||||||
<div className="grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3">
|
|
||||||
<Title>Key Aliases</Title>
|
|
||||||
|
|
||||||
<Text>
|
|
||||||
{selectedItem && selectedItem.key_aliases
|
|
||||||
? selectedItem.key_aliases.filter(key => key !== null).length > 0
|
|
||||||
? selectedItem.key_aliases.filter(key => key !== null).join(', ')
|
|
||||||
: 'No Keys'
|
|
||||||
: "No Keys"}
|
|
||||||
</Text>
|
|
||||||
</div>
|
|
||||||
</DialogPanel>
|
|
||||||
</Dialog> */}
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue