Merge branch 'main' into litellm_redis_cache_usage

This commit is contained in:
Krish Dholakia 2024-06-13 22:07:21 -07:00 committed by GitHub
commit c373f104cc
131 changed files with 3117 additions and 476 deletions

View file

@ -202,6 +202,7 @@ jobs:
-e REDIS_PORT=$REDIS_PORT \ -e REDIS_PORT=$REDIS_PORT \
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \ -e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \ -e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e AWS_REGION_NAME=$AWS_REGION_NAME \ -e AWS_REGION_NAME=$AWS_REGION_NAME \

View file

@ -4,6 +4,7 @@ import Image from '@theme/IdealImage';
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.). See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
**on SDK**
```python ```python
# pip install langfuse # pip install langfuse
import litellm import litellm
@ -33,6 +34,13 @@ response = litellm.completion(
) )
``` ```
**on Proxy**
```yaml
litellm_settings:
log_raw_request_response: True
```
**Expected Log** **Expected Log**
<Image img={require('../../img/raw_request_log.png')}/> <Image img={require('../../img/raw_request_log.png')}/>

View file

@ -68,6 +68,7 @@ response = litellm.completion(
| Model Name | Function Call | | Model Name | Function Call |
|------------------|----------------------------------------| |------------------|----------------------------------------|
| gpt-4o | `completion('azure/<your deployment name>', messages)` |
| gpt-4 | `completion('azure/<your deployment name>', messages)` | | gpt-4 | `completion('azure/<your deployment name>', messages)` |
| gpt-4-0314 | `completion('azure/<your deployment name>', messages)` | | gpt-4-0314 | `completion('azure/<your deployment name>', messages)` |
| gpt-4-0613 | `completion('azure/<your deployment name>', messages)` | | gpt-4-0613 | `completion('azure/<your deployment name>', messages)` |
@ -85,7 +86,8 @@ response = litellm.completion(
## Azure OpenAI Vision Models ## Azure OpenAI Vision Models
| Model Name | Function Call | | Model Name | Function Call |
|-----------------------|-----------------------------------------------------------------| |-----------------------|-----------------------------------------------------------------|
| gpt-4-vision | `response = completion(model="azure/<your deployment name>", messages=messages)` | | gpt-4-vision | `completion(model="azure/<your deployment name>", messages=messages)` |
| gpt-4o | `completion('azure/<your deployment name>', messages)` |
#### Usage #### Usage
```python ```python

View file

@ -144,16 +144,135 @@ print(response)
</TabItem> </TabItem>
</Tabs> </Tabs>
## Set temperature, top p, etc.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
temperature=0.7,
top_p=1
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Set on yaml**
```yaml
model_list:
- model_name: bedrock-claude-v1
litellm_params:
model: bedrock/anthropic.claude-instant-v1
temperature: <your-temp>
top_p: <your-top-p>
```
**Set on request**
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
temperature=0.7,
top_p=1
)
print(response)
```
</TabItem>
</Tabs>
## Pass provider-specific params
If you pass a non-openai param to litellm, we'll assume it's provider-specific and send it as a kwarg in the request body. [See more](../completion/input.md#provider-specific-params)
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Set on yaml**
```yaml
model_list:
- model_name: bedrock-claude-v1
litellm_params:
model: bedrock/anthropic.claude-instant-v1
top_k: 1 # 👈 PROVIDER-SPECIFIC PARAM
```
**Set on request**
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
temperature=0.7,
extra_body={
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
}
)
print(response)
```
</TabItem>
</Tabs>
## Usage - Function Calling ## Usage - Function Calling
:::info LiteLLM uses Bedrock's Converse API for making tool calls
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
You can see the raw response via `response._hidden_params["original_response"]`.
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
:::
```python ```python
from litellm import completion from litellm import completion

View file

@ -1,10 +1,13 @@
# Clarifai # Clarifai
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai. Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
:::warning
Streaming is not yet supported on using clarifai and litellm. Tracking support here: https://github.com/BerriAI/litellm/issues/4162
:::
## Pre-Requisites ## Pre-Requisites
`pip install clarifai`
`pip install litellm` `pip install litellm`
## Required Environment Variables ## Required Environment Variables
@ -12,6 +15,7 @@ To obtain your Clarifai Personal access token follow this [link](https://docs.cl
```python ```python
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
``` ```
## Usage ## Usage
@ -68,7 +72,7 @@ Example Usage - Note: liteLLM supports all models deployed on Clarifai
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`| | clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` | | clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
## Mistal LLMs ## Mistral LLMs
| Model Name | Function Call | | Model Name | Function Call |
|---------------------------------------------|------------------------------------------------------------------------| |---------------------------------------------|------------------------------------------------------------------------|
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` | | clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |

View file

@ -8,6 +8,152 @@ import TabItem from '@theme/TabItem';
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> </a>
## 🆕 `vertex_ai_beta/` route
New `vertex_ai_beta/` route. Adds support for system messages, tool_choice params, etc. by moving to httpx client (instead of vertex sdk).
```python
from litellm import completion
import json
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
## COMPLETION CALL
response = completion(
model="vertex_ai_beta/gemini-pro",
messages=[{ "content": "Hello, how are you?","role": "user"}],
vertex_credentials=vertex_credentials_json
)
```
### **System Message**
```python
from litellm import completion
import json
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
response = completion(
model="vertex_ai_beta/gemini-pro",
messages=[{"content": "You are a good bot.","role": "system"}, {"content": "Hello, how are you?","role": "user"}],
vertex_credentials=vertex_credentials_json
)
```
### **Function Calling**
Force Gemini to make tool calls with `tool_choice="required"`.
```python
from litellm import completion
import json
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
messages = [
{
"role": "system",
"content": "Your name is Litellm Bot, you are a helpful assistant",
},
# User asks for their name and weather in San Francisco
{
"role": "user",
"content": "Hello, what is your name and can you tell me the weather?",
},
]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}
},
"required": ["location"],
},
},
}
]
data = {
"model": "vertex_ai_beta/gemini-1.5-pro-preview-0514"),
"messages": messages,
"tools": tools,
"tool_choice": "required",
"vertex_credentials": vertex_credentials_json
}
## COMPLETION CALL
print(completion(**data))
```
### **JSON Schema**
```python
from litellm import completion
## GET CREDENTIALS
file_path = 'path/to/vertex_ai_service_account.json'
# Load the JSON file
with open(file_path, 'r') as file:
vertex_credentials = json.load(file)
# Convert to JSON string
vertex_credentials_json = json.dumps(vertex_credentials)
messages = [
{
"role": "user",
"content": """
List 5 popular cookie recipes.
Using this JSON schema:
Recipe = {"recipe_name": str}
Return a `list[Recipe]`
"""
}
]
completion(model="vertex_ai_beta/gemini-1.5-flash-preview-0514", messages=messages, response_format={ "type": "json_object" })
```
## Pre-requisites ## Pre-requisites
* `pip install google-cloud-aiplatform` (pre-installed on proxy docker image) * `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
* Authentication: * Authentication:
@ -140,7 +286,7 @@ In certain use-cases you may need to make calls to the models and pass [safety s
```python ```python
response = completion( response = completion(
model="gemini/gemini-pro", model="vertex_ai/gemini-pro",
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}] messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
safety_settings=[ safety_settings=[
{ {
@ -363,8 +509,8 @@ response = completion(
## Gemini 1.5 Pro (and Vision) ## Gemini 1.5 Pro (and Vision)
| Model Name | Function Call | | Model Name | Function Call |
|------------------|--------------------------------------| |------------------|--------------------------------------|
| gemini-1.5-pro | `completion('gemini-1.5-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` | | gemini-1.5-pro | `completion('gemini-1.5-pro', messages)`, `completion('vertex_ai/gemini-1.5-pro', messages)` |
| gemini-1.5-flash-preview-0514 | `completion('gemini-1.5-flash-preview-0514', messages)`, `completion('vertex_ai/gemini-pro', messages)` | | gemini-1.5-flash-preview-0514 | `completion('gemini-1.5-flash-preview-0514', messages)`, `completion('vertex_ai/gemini-1.5-flash-preview-0514', messages)` |
| gemini-1.5-pro-preview-0514 | `completion('gemini-1.5-pro-preview-0514', messages)`, `completion('vertex_ai/gemini-1.5-pro-preview-0514', messages)` | | gemini-1.5-pro-preview-0514 | `completion('gemini-1.5-pro-preview-0514', messages)`, `completion('vertex_ai/gemini-1.5-pro-preview-0514', messages)` |
@ -558,6 +704,29 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` | | text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` | | text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
### Advanced Use `task_type` and `title` (Vertex Specific Params)
👉 `task_type` and `title` are vertex specific params
LiteLLM Supported Vertex Specific Params
```python
auto_truncate: Optional[bool] = None
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
```
**Example Usage with LiteLLM**
```python
response = litellm.embedding(
model="vertex_ai/text-embedding-004",
input=["good morning from litellm", "gm"]
task_type = "RETRIEVAL_DOCUMENT",
dimensions=1,
auto_truncate=True,
)
```
## Image Generation Models ## Image Generation Models
Usage Usage
@ -657,6 +826,3 @@ s/o @[Darien Kindlund](https://www.linkedin.com/in/kindlund/) for this tutorial

View file

@ -1,3 +1,5 @@
import Image from '@theme/IdealImage';
# 🚨 Alerting / Webhooks # 🚨 Alerting / Webhooks
Get alerts for: Get alerts for:
@ -15,6 +17,11 @@ Get alerts for:
- **Spend** Weekly & Monthly spend per Team, Tag - **Spend** Weekly & Monthly spend per Team, Tag
Works across:
- [Slack](#quick-start)
- [Discord](#advanced---using-discord-webhooks)
- [Microsoft Teams](#advanced---using-ms-teams-webhooks)
## Quick Start ## Quick Start
Set up a slack alert channel to receive alerts from proxy. Set up a slack alert channel to receive alerts from proxy.
@ -108,6 +115,48 @@ AlertType = Literal[
``` ```
## Advanced - Using MS Teams Webhooks
MS Teams provides a slack compatible webhook url that you can use for alerting
##### Quick Start
1. [Get a webhook url](https://learn.microsoft.com/en-us/microsoftteams/platform/webhooks-and-connectors/how-to/add-incoming-webhook?tabs=newteams%2Cdotnet#create-an-incoming-webhook) for your Microsoft Teams channel
2. Add it to your .env
```bash
SLACK_WEBHOOK_URL="https://berriai.webhook.office.com/webhookb2/...6901/IncomingWebhook/b55fa0c2a48647be8e6effedcd540266/e04b1092-4a3e-44a2-ab6b-29a0a4854d1d"
```
3. Add it to your litellm config
```yaml
model_list:
model_name: "azure-model"
litellm_params:
model: "azure/gpt-35-turbo"
api_key: "my-bad-key" # 👈 bad key
general_settings:
alerting: ["slack"]
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
```
4. Run health check!
Call the proxy `/health/services` endpoint to test if your alerting connection is correctly setup.
```bash
curl --location 'http://0.0.0.0:4000/health/services?service=slack' \
--header 'Authorization: Bearer sk-1234'
```
**Expected Response**
<Image img={require('../../img/ms_teams_alerting.png')}/>
## Advanced - Using Discord Webhooks ## Advanced - Using Discord Webhooks
Discord provides a slack compatible webhook url that you can use for alerting Discord provides a slack compatible webhook url that you can use for alerting
@ -139,7 +188,6 @@ environment_variables:
SLACK_WEBHOOK_URL: "https://discord.com/api/webhooks/1240030362193760286/cTLWt5ATn1gKmcy_982rl5xmYHsrM1IWJdmCL1AyOmU9JdQXazrp8L1_PYgUtgxj8x4f/slack" SLACK_WEBHOOK_URL: "https://discord.com/api/webhooks/1240030362193760286/cTLWt5ATn1gKmcy_982rl5xmYHsrM1IWJdmCL1AyOmU9JdQXazrp8L1_PYgUtgxj8x4f/slack"
``` ```
That's it ! You're ready to go !
## Advanced - [BETA] Webhooks for Budget Alerts ## Advanced - [BETA] Webhooks for Budget Alerts

View file

@ -1,5 +1,6 @@
import Tabs from '@theme/Tabs'; import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem'; import TabItem from '@theme/TabItem';
import Image from '@theme/IdealImage';
# 🐳 Docker, Deploying LiteLLM Proxy # 🐳 Docker, Deploying LiteLLM Proxy
@ -537,7 +538,9 @@ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
## Advanced Deployment Settings ## Advanced Deployment Settings
### Customization of the server root path ### 1. Customization of the server root path (custom Proxy base url)
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
:::info :::info
@ -548,9 +551,29 @@ In a Kubernetes deployment, it's possible to utilize a shared DNS to host multip
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment. Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path 👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
```
export SERVER_ROOT_PATH="/api/v1"
```
**Step 1. Run Proxy with `SERVER_ROOT_PATH` set in your env **
### Setting SSL Certification ```shell
docker run --name litellm-proxy \
-e DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname> \
-e SERVER_ROOT_PATH="/api/v1" \
-p 4000:4000 \
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
```
After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (since we set `SERVER_ROOT_PATH="/api/v1"`)
**Step 2. Verify Running on correct path**
<Image img={require('../../img/custom_root_path.png')} />
**That's it**, that's all you need to run the proxy on a custom root path
### 2. Setting SSL Certification
Use this, If you need to set ssl certificates for your on prem litellm proxy Use this, If you need to set ssl certificates for your on prem litellm proxy

View file

@ -1,4 +1,4 @@
# Grafana, Prometheus metrics [BETA] # 📈 Prometheus metrics [BETA]
LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
@ -54,6 +54,13 @@ http://localhost:4000/metrics
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` | | `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` | | `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
### Budget Metrics
| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_remaining_team_budget_metric` | Remaining Budget for Team (A team created on LiteLLM) |
| `litellm_remaining_api_key_budget_metric` | Remaining Budget for API Key (A key Created on LiteLLM)|
## Monitor System Health ## Monitor System Health
To monitor the health of litellm adjacent services (redis / postgres), do: To monitor the health of litellm adjacent services (redis / postgres), do:

View file

@ -124,3 +124,17 @@ LiteLLM Enterprise: Enable [SSO login](./ui.md#setup-ssoauth-for-ui)
<Image img={require('../../img/ui_self_serve_create_key.png')} style={{ width: '800px', height: 'auto' }} /> <Image img={require('../../img/ui_self_serve_create_key.png')} style={{ width: '800px', height: 'auto' }} />
## Advanced
### Setting custom logout URLs
Set `PROXY_LOGOUT_URL` in your .env if you want users to get redirected to a specific URL when they click logout
```
export PROXY_LOGOUT_URL="https://www.google.com"
```
<Image img={require('../../img/ui_logout.png')} style={{ width: '400px', height: 'auto' }} />

View file

@ -0,0 +1,123 @@
import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# 💰 Setting Team Budgets
Track spend, set budgets for your Internal Team
## Setting Monthly Team Budgets
### 1. Create a team
- Set `max_budget=000000001` ($ value the team is allowed to spend)
- Set `budget_duration="1d"` (How frequently the budget should update)
Create a new team and set `max_budget` and `budget_duration`
```shell
curl -X POST 'http://0.0.0.0:4000/team/new' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-d '{
"team_alias": "QA Prod Bot",
"max_budget": 0.000000001,
"budget_duration": "1d"
}'
```
Response
```shell
{
"team_alias": "QA Prod Bot",
"team_id": "de35b29e-6ca8-4f47-b804-2b79d07aa99a",
"max_budget": 0.0001,
"budget_duration": "1d",
"budget_reset_at": "2024-06-14T22:48:36.594000Z"
}
```
Possible values for `budget_duration`
| `budget_duration` | When Budget will reset |
| --- | --- |
| `budget_duration="1s"` | every 1 second |
| `budget_duration="1m"` | every 1 min |
| `budget_duration="1h"` | every 1 hour |
| `budget_duration="1d"` | every 1 day |
| `budget_duration="1mo"` | start of every month |
### 2. Create a key for the `team`
Create a key for `team_id="de35b29e-6ca8-4f47-b804-2b79d07aa99a"` from Step 1
💡 **The Budget for Team="QA Prod Bot" budget will apply to this team**
```shell
curl -X POST 'http://0.0.0.0:4000/key/generate' \
-H 'Authorization: Bearer sk-1234' \
-H 'Content-Type: application/json' \
-d '{"team_id": "de35b29e-6ca8-4f47-b804-2b79d07aa99a"}'
```
Response
```shell
{"team_id":"de35b29e-6ca8-4f47-b804-2b79d07aa99a", "key":"sk-5qtncoYjzRcxMM4bDRktNQ"}
```
### 3. Test It
Use the key from step 2 and run this Request twice
```shell
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Authorization: Bearer sk-mso-JSykEGri86KyOvgxBw' \
-H 'Content-Type: application/json' \
-d ' {
"model": "llama3",
"messages": [
{
"role": "user",
"content": "hi"
}
]
}'
```
On the 2nd response - expect to see the following exception
```shell
{
"error": {
"message": "Budget has been exceeded! Current cost: 3.5e-06, Max budget: 1e-09",
"type": "auth_error",
"param": null,
"code": 400
}
}
```
## Advanced
### Prometheus metrics for `remaining_budget`
[More info about Prometheus metrics here](https://docs.litellm.ai/docs/proxy/prometheus)
You'll need the following in your proxy config.yaml
```yaml
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
```
Expect to see this metric on prometheus to track the Remaining Budget for the team
```shell
litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-6ca8-4f47-b804-2b79d07aa99a"} 9.699999999999992e-06
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 241 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

View file

@ -44,6 +44,7 @@ const sidebars = {
"proxy/self_serve", "proxy/self_serve",
"proxy/users", "proxy/users",
"proxy/customers", "proxy/customers",
"proxy/team_budgets",
"proxy/billing", "proxy/billing",
"proxy/user_keys", "proxy/user_keys",
"proxy/virtual_keys", "proxy/virtual_keys",
@ -54,6 +55,7 @@ const sidebars = {
items: ["proxy/logging", "proxy/streaming_logging"], items: ["proxy/logging", "proxy/streaming_logging"],
}, },
"proxy/ui", "proxy/ui",
"proxy/prometheus",
"proxy/email", "proxy/email",
"proxy/multiple_admins", "proxy/multiple_admins",
"proxy/team_based_routing", "proxy/team_based_routing",
@ -70,7 +72,6 @@ const sidebars = {
"proxy/pii_masking", "proxy/pii_masking",
"proxy/prompt_injection", "proxy/prompt_injection",
"proxy/caching", "proxy/caching",
"proxy/prometheus",
"proxy/call_hooks", "proxy/call_hooks",
"proxy/rules", "proxy/rules",
"proxy/cli", "proxy/cli",

View file

@ -93,7 +93,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
response.choices[0], litellm.utils.Choices response.choices[0], litellm.utils.Choices
): ):
for word in self.banned_keywords_list: for word in self.banned_keywords_list:
self.test_violation(test_str=response.choices[0].message.content) self.test_violation(test_str=response.choices[0].message.content or "")
async def async_post_call_streaming_hook( async def async_post_call_streaming_hook(
self, self,

View file

@ -73,7 +73,7 @@ token: Optional[str] = (
) )
telemetry = True telemetry = True
max_tokens = 256 # OpenAI Defaults max_tokens = 256 # OpenAI Defaults
drop_params = False drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
modify_params = False modify_params = False
retry = True retry = True
### AUTH ### ### AUTH ###
@ -605,6 +605,7 @@ provider_list: List = [
"together_ai", "together_ai",
"openrouter", "openrouter",
"vertex_ai", "vertex_ai",
"vertex_ai_beta",
"palm", "palm",
"gemini", "gemini",
"ai21", "ai21",
@ -765,7 +766,8 @@ from .llms.gemini import GeminiConfig
from .llms.nlp_cloud import NLPCloudConfig from .llms.nlp_cloud import NLPCloudConfig
from .llms.aleph_alpha import AlephAlphaConfig from .llms.aleph_alpha import AlephAlphaConfig
from .llms.petals import PetalsConfig from .llms.petals import PetalsConfig
from .llms.vertex_ai import VertexAIConfig from .llms.vertex_httpx import VertexGeminiConfig
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
from .llms.sagemaker import SagemakerConfig from .llms.sagemaker import SagemakerConfig
from .llms.ollama import OllamaConfig from .llms.ollama import OllamaConfig
@ -787,7 +789,9 @@ from .llms.openai import (
OpenAIConfig, OpenAIConfig,
OpenAITextCompletionConfig, OpenAITextCompletionConfig,
MistralConfig, MistralConfig,
MistralEmbeddingConfig,
DeepInfraConfig, DeepInfraConfig,
AzureAIStudioConfig,
) )
from .llms.azure import ( from .llms.azure import (
AzureOpenAIConfig, AzureOpenAIConfig,

View file

@ -1192,7 +1192,7 @@ class S3Cache(BaseCache):
return cached_response return cached_response
except botocore.exceptions.ClientError as e: except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey": if e.response["Error"]["Code"] == "NoSuchKey":
verbose_logger.error( verbose_logger.debug(
f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket." f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
) )
return None return None

View file

@ -26,7 +26,7 @@ class AuthenticationError(openai.AuthenticationError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 401 self.status_code = 401
self.message = message self.message = "litellm.AuthenticationError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -72,7 +72,7 @@ class NotFoundError(openai.NotFoundError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 404 self.status_code = 404
self.message = message self.message = "litellm.NotFoundError: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -117,7 +117,7 @@ class BadRequestError(openai.BadRequestError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 400 self.status_code = 400
self.message = message self.message = "litellm.BadRequestError: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -162,7 +162,7 @@ class UnprocessableEntityError(openai.UnprocessableEntityError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 422 self.status_code = 422
self.message = message self.message = "litellm.UnprocessableEntityError: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -204,7 +204,7 @@ class Timeout(openai.APITimeoutError): # type: ignore
request=request request=request
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
self.status_code = 408 self.status_code = 408
self.message = message self.message = "litellm.Timeout: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -241,7 +241,7 @@ class PermissionDeniedError(openai.PermissionDeniedError): # type:ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 403 self.status_code = 403
self.message = message self.message = "litellm.PermissionDeniedError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -280,7 +280,7 @@ class RateLimitError(openai.RateLimitError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 429 self.status_code = 429
self.message = message self.message = "litellm.RateLimitError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -328,7 +328,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
litellm_debug_info: Optional[str] = None, litellm_debug_info: Optional[str] = None,
): ):
self.status_code = 400 self.status_code = 400
self.message = message self.message = "litellm.ContextWindowExceededError: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -368,7 +368,7 @@ class RejectedRequestError(BadRequestError): # type: ignore
litellm_debug_info: Optional[str] = None, litellm_debug_info: Optional[str] = None,
): ):
self.status_code = 400 self.status_code = 400
self.message = message self.message = "litellm.RejectedRequestError: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -411,7 +411,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
litellm_debug_info: Optional[str] = None, litellm_debug_info: Optional[str] = None,
): ):
self.status_code = 400 self.status_code = 400
self.message = message self.message = "litellm.ContentPolicyViolationError: {}".format(message)
self.model = model self.model = model
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -452,7 +452,7 @@ class ServiceUnavailableError(openai.APIStatusError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 503 self.status_code = 503
self.message = message self.message = "litellm.ServiceUnavailableError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -501,7 +501,7 @@ class InternalServerError(openai.InternalServerError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = 500 self.status_code = 500
self.message = message self.message = "litellm.InternalServerError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -552,7 +552,7 @@ class APIError(openai.APIError): # type: ignore
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.status_code = status_code self.status_code = status_code
self.message = message self.message = "litellm.APIError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.litellm_debug_info = litellm_debug_info self.litellm_debug_info = litellm_debug_info
@ -589,7 +589,7 @@ class APIConnectionError(openai.APIConnectionError): # type: ignore
max_retries: Optional[int] = None, max_retries: Optional[int] = None,
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.message = message self.message = "litellm.APIConnectionError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
self.status_code = 500 self.status_code = 500
@ -626,7 +626,7 @@ class APIResponseValidationError(openai.APIResponseValidationError): # type: ig
max_retries: Optional[int] = None, max_retries: Optional[int] = None,
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
): ):
self.message = message self.message = "litellm.APIResponseValidationError: {}".format(message)
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model self.model = model
request = httpx.Request(method="POST", url="https://api.openai.com/v1") request = httpx.Request(method="POST", url="https://api.openai.com/v1")

View file

@ -8,6 +8,7 @@ import traceback
import datetime, subprocess, sys import datetime, subprocess, sys
import litellm, uuid import litellm, uuid
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from typing import Optional, Union
class PrometheusLogger: class PrometheusLogger:
@ -17,33 +18,76 @@ class PrometheusLogger:
**kwargs, **kwargs,
): ):
try: try:
from prometheus_client import Counter from prometheus_client import Counter, Gauge
self.litellm_llm_api_failed_requests_metric = Counter( self.litellm_llm_api_failed_requests_metric = Counter(
name="litellm_llm_api_failed_requests_metric", name="litellm_llm_api_failed_requests_metric",
documentation="Total number of failed LLM API calls via litellm", documentation="Total number of failed LLM API calls via litellm",
labelnames=["end_user", "hashed_api_key", "model", "team", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
self.litellm_requests_metric = Counter( self.litellm_requests_metric = Counter(
name="litellm_requests_metric", name="litellm_requests_metric",
documentation="Total number of LLM calls to litellm", documentation="Total number of LLM calls to litellm",
labelnames=["end_user", "hashed_api_key", "model", "team", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
# Counter for spend # Counter for spend
self.litellm_spend_metric = Counter( self.litellm_spend_metric = Counter(
"litellm_spend_metric", "litellm_spend_metric",
"Total spend on LLM requests", "Total spend on LLM requests",
labelnames=["end_user", "hashed_api_key", "model", "team", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
# Counter for total_output_tokens # Counter for total_output_tokens
self.litellm_tokens_metric = Counter( self.litellm_tokens_metric = Counter(
"litellm_total_tokens", "litellm_total_tokens",
"Total number of input + output tokens from LLM requests", "Total number of input + output tokens from LLM requests",
labelnames=["end_user", "hashed_api_key", "model", "team", "user"], labelnames=[
"end_user",
"hashed_api_key",
"model",
"team",
"team_alias",
"user",
],
) )
# Remaining Budget for Team
self.litellm_remaining_team_budget_metric = Gauge(
"litellm_remaining_team_budget_metric",
"Remaining budget for team",
labelnames=["team_id", "team_alias"],
)
# Remaining Budget for API Key
self.litellm_remaining_api_key_budget_metric = Gauge(
"litellm_remaining_api_key_budget_metric",
"Remaining budget for api key",
labelnames=["hashed_api_key", "api_key_alias"],
)
except Exception as e: except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}") print_verbose(f"Got exception on init prometheus client {str(e)}")
raise e raise e
@ -51,7 +95,9 @@ class PrometheusLogger:
async def _async_log_event( async def _async_log_event(
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
): ):
self.log_event(kwargs, response_obj, start_time, end_time, print_verbose) self.log_event(
kwargs, response_obj, start_time, end_time, user_id, print_verbose
)
def log_event( def log_event(
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
@ -72,9 +118,36 @@ class PrometheusLogger:
"user_api_key_user_id", None "user_api_key_user_id", None
) )
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None) user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
user_api_key_alias = litellm_params.get("metadata", {}).get(
"user_api_key_alias", None
)
user_api_team = litellm_params.get("metadata", {}).get( user_api_team = litellm_params.get("metadata", {}).get(
"user_api_key_team_id", None "user_api_key_team_id", None
) )
user_api_team_alias = litellm_params.get("metadata", {}).get(
"user_api_key_team_alias", None
)
_team_spend = litellm_params.get("metadata", {}).get(
"user_api_key_team_spend", None
)
_team_max_budget = litellm_params.get("metadata", {}).get(
"user_api_key_team_max_budget", None
)
_remaining_team_budget = safe_get_remaining_budget(
max_budget=_team_max_budget, spend=_team_spend
)
_api_key_spend = litellm_params.get("metadata", {}).get(
"user_api_key_spend", None
)
_api_key_max_budget = litellm_params.get("metadata", {}).get(
"user_api_key_max_budget", None
)
_remaining_api_key_budget = safe_get_remaining_budget(
max_budget=_api_key_max_budget, spend=_api_key_spend
)
if response_obj is not None: if response_obj is not None:
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0) tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
else: else:
@ -94,19 +167,47 @@ class PrometheusLogger:
user_api_key = hash_token(user_api_key) user_api_key = hash_token(user_api_key)
self.litellm_requests_metric.labels( self.litellm_requests_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc() ).inc()
self.litellm_spend_metric.labels( self.litellm_spend_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc(response_cost) ).inc(response_cost)
self.litellm_tokens_metric.labels( self.litellm_tokens_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc(tokens_used) ).inc(tokens_used)
self.litellm_remaining_team_budget_metric.labels(
user_api_team, user_api_team_alias
).set(_remaining_team_budget)
self.litellm_remaining_api_key_budget_metric.labels(
user_api_key, user_api_key_alias
).set(_remaining_api_key_budget)
### FAILURE INCREMENT ### ### FAILURE INCREMENT ###
if "exception" in kwargs: if "exception" in kwargs:
self.litellm_llm_api_failed_requests_metric.labels( self.litellm_llm_api_failed_requests_metric.labels(
end_user_id, user_api_key, model, user_api_team, user_id end_user_id,
user_api_key,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc() ).inc()
except Exception as e: except Exception as e:
verbose_logger.error( verbose_logger.error(
@ -114,3 +215,15 @@ class PrometheusLogger:
) )
verbose_logger.debug(traceback.format_exc()) verbose_logger.debug(traceback.format_exc())
pass pass
def safe_get_remaining_budget(
max_budget: Optional[float], spend: Optional[float]
) -> float:
if max_budget is None:
return float("inf")
if spend is None:
return max_budget
return max_budget - spend

View file

@ -0,0 +1,65 @@
# +-----------------------------------------------+
# | |
# | Give Feedback / Get Help |
# | https://github.com/BerriAI/litellm/issues/new |
# | |
# +-----------------------------------------------+
#
# Thank you users! We ❤️ you! - Krrish & Ishaan
import copy
from typing import TYPE_CHECKING, Any
import litellm
if TYPE_CHECKING:
from litellm.utils import Logging as _LiteLLMLoggingObject
LiteLLMLoggingObject = _LiteLLMLoggingObject
else:
LiteLLMLoggingObject = Any
def redact_message_input_output_from_logging(
litellm_logging_obj: LiteLLMLoggingObject, result
):
"""
Removes messages, prompts, input, response from logging. This modifies the data in-place
only redacts when litellm.turn_off_message_logging == True
"""
# check if user opted out of logging message/response to callbacks
if litellm.turn_off_message_logging is not True:
return result
_result = copy.deepcopy(result)
# remove messages, prompts, input, response from logging
litellm_logging_obj.model_call_details["messages"] = [
{"role": "user", "content": "redacted-by-litellm"}
]
litellm_logging_obj.model_call_details["prompt"] = ""
litellm_logging_obj.model_call_details["input"] = ""
# response cleaning
# ChatCompletion Responses
if (
litellm_logging_obj.stream is True
and "complete_streaming_response" in litellm_logging_obj.model_call_details
):
_streaming_response = litellm_logging_obj.model_call_details[
"complete_streaming_response"
]
for choice in _streaming_response.choices:
if isinstance(choice, litellm.Choices):
choice.message.content = "redacted-by-litellm"
elif isinstance(choice, litellm.utils.StreamingChoices):
choice.delta.content = "redacted-by-litellm"
else:
if _result is not None:
if isinstance(_result, litellm.ModelResponse):
if hasattr(_result, "choices") and _result.choices is not None:
for choice in _result.choices:
if isinstance(choice, litellm.Choices):
choice.message.content = "redacted-by-litellm"
elif isinstance(choice, litellm.utils.StreamingChoices):
choice.delta.content = "redacted-by-litellm"
return _result

View file

@ -36,6 +36,9 @@ from ..types.llms.openai import (
AsyncAssistantStreamManager, AsyncAssistantStreamManager,
AssistantStreamManager, AssistantStreamManager,
) )
from litellm.caching import DualCache
azure_ad_cache = DualCache()
class AzureOpenAIError(Exception): class AzureOpenAIError(Exception):
@ -309,9 +312,10 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
def get_azure_ad_token_from_oidc(azure_ad_token: str): def get_azure_ad_token_from_oidc(azure_ad_token: str):
azure_client_id = os.getenv("AZURE_CLIENT_ID", None) azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
azure_tenant = os.getenv("AZURE_TENANT_ID", None) azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
azure_authority_host = os.getenv("AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com")
if azure_client_id is None or azure_tenant is None: if azure_client_id is None or azure_tenant_id is None:
raise AzureOpenAIError( raise AzureOpenAIError(
status_code=422, status_code=422,
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set", message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
@ -325,8 +329,19 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
message="OIDC token could not be retrieved from secret manager.", message="OIDC token could not be retrieved from secret manager.",
) )
azure_ad_token_cache_key = json.dumps({
"azure_client_id": azure_client_id,
"azure_tenant_id": azure_tenant_id,
"azure_authority_host": azure_authority_host,
"oidc_token": oidc_token,
})
azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
if azure_ad_token_access_token is not None:
return azure_ad_token_access_token
req_token = httpx.post( req_token = httpx.post(
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token", f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
data={ data={
"client_id": azure_client_id, "client_id": azure_client_id,
"grant_type": "client_credentials", "grant_type": "client_credentials",
@ -342,12 +357,23 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
message=req_token.text, message=req_token.text,
) )
possible_azure_ad_token = req_token.json().get("access_token", None) azure_ad_token_json = req_token.json()
azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
if possible_azure_ad_token is None: if azure_ad_token_access_token is None:
raise AzureOpenAIError(status_code=422, message="Azure AD Token not returned") raise AzureOpenAIError(
status_code=422, message="Azure AD Token access_token not returned"
)
return possible_azure_ad_token if azure_ad_token_expires_in is None:
raise AzureOpenAIError(
status_code=422, message="Azure AD Token expires_in not returned"
)
azure_ad_cache.set_cache(key=azure_ad_token_cache_key, value=azure_ad_token_access_token, ttl=azure_ad_token_expires_in)
return azure_ad_token_access_token
class AzureChatCompletion(BaseLLM): class AzureChatCompletion(BaseLLM):

View file

@ -53,7 +53,9 @@ from litellm.types.llms.openai import (
ChatCompletionToolCallFunctionChunk, ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaChunk, ChatCompletionDeltaChunk,
) )
from litellm.caching import DualCache
iam_cache = DualCache()
class AmazonCohereChatConfig: class AmazonCohereChatConfig:
""" """
@ -325,11 +327,16 @@ class BedrockLLM(BaseLLM):
) = params_to_check ) = params_to_check
### CHECK STS ### ### CHECK STS ###
if ( if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
aws_web_identity_token is not None iam_creds_cache_key = json.dumps({
and aws_role_name is not None "aws_web_identity_token": aws_web_identity_token,
and aws_session_name is not None "aws_role_name": aws_role_name,
): "aws_session_name": aws_session_name,
"aws_region_name": aws_region_name,
})
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
if iam_creds_dict is None:
oidc_token = get_secret(aws_web_identity_token) oidc_token = get_secret(aws_web_identity_token)
if oidc_token is None: if oidc_token is None:
@ -338,7 +345,11 @@ class BedrockLLM(BaseLLM):
status_code=401, status_code=401,
) )
sts_client = boto3.client("sts") sts_client = boto3.client(
"sts",
region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
@ -349,14 +360,20 @@ class BedrockLLM(BaseLLM):
DurationSeconds=3600, DurationSeconds=3600,
) )
session = boto3.Session( iam_creds_dict = {
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"], "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"], "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
aws_session_token=sts_response["Credentials"]["SessionToken"], "aws_session_token": sts_response["Credentials"]["SessionToken"],
region_name=aws_region_name, "region_name": aws_region_name,
) }
return session.get_credentials() iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials()
return iam_creds
elif aws_role_name is not None and aws_session_name is not None: elif aws_role_name is not None and aws_session_name is not None:
sts_client = boto3.client( sts_client = boto3.client(
"sts", "sts",
@ -1416,11 +1433,16 @@ class BedrockConverseLLM(BaseLLM):
) = params_to_check ) = params_to_check
### CHECK STS ### ### CHECK STS ###
if ( if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
aws_web_identity_token is not None iam_creds_cache_key = json.dumps({
and aws_role_name is not None "aws_web_identity_token": aws_web_identity_token,
and aws_session_name is not None "aws_role_name": aws_role_name,
): "aws_session_name": aws_session_name,
"aws_region_name": aws_region_name,
})
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
if iam_creds_dict is None:
oidc_token = get_secret(aws_web_identity_token) oidc_token = get_secret(aws_web_identity_token)
if oidc_token is None: if oidc_token is None:
@ -1429,7 +1451,11 @@ class BedrockConverseLLM(BaseLLM):
status_code=401, status_code=401,
) )
sts_client = boto3.client("sts") sts_client = boto3.client(
"sts",
region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
@ -1440,14 +1466,20 @@ class BedrockConverseLLM(BaseLLM):
DurationSeconds=3600, DurationSeconds=3600,
) )
session = boto3.Session( iam_creds_dict = {
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"], "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"], "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
aws_session_token=sts_response["Credentials"]["SessionToken"], "aws_session_token": sts_response["Credentials"]["SessionToken"],
region_name=aws_region_name, "region_name": aws_region_name,
) }
return session.get_credentials() iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials()
return iam_creds
elif aws_role_name is not None and aws_session_name is not None: elif aws_role_name is not None and aws_session_name is not None:
sts_client = boto3.client( sts_client = boto3.client(
"sts", "sts",

View file

@ -139,6 +139,7 @@ def process_response(
def convert_model_to_url(model: str, api_base: str): def convert_model_to_url(model: str, api_base: str):
user_id, app_id, model_id = model.split(".") user_id, app_id, model_id = model.split(".")
model_id = model_id.lower()
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs" return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
@ -171,19 +172,55 @@ async def async_completion(
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0)) async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
response = await async_handler.post( response = await async_handler.post(
api_base, headers=headers, data=json.dumps(data) url=model, headers=headers, data=json.dumps(data)
) )
return process_response( logging_obj.post_call(
model=model, input=prompt,
prompt=prompt,
response=response,
model_response=model_response,
api_key=api_key, api_key=api_key,
data=data, original_response=response.text,
encoding=encoding, additional_args={"complete_input_dict": data},
logging_obj=logging_obj,
) )
## RESPONSE OBJECT
try:
completion_response = response.json()
except Exception:
raise ClarifaiError(
message=response.text, status_code=response.status_code, url=model
)
# print(completion_response)
try:
choices_list = []
for idx, item in enumerate(completion_response["outputs"]):
if len(item["data"]["text"]["raw"]) > 0:
message_obj = Message(content=item["data"]["text"]["raw"])
else:
message_obj = Message(content=None)
choice_obj = Choices(
finish_reason="stop",
index=idx + 1, # check
message=message_obj,
)
choices_list.append(choice_obj)
model_response["choices"] = choices_list
except Exception as e:
raise ClarifaiError(
message=traceback.format_exc(), status_code=response.status_code, url=model
)
# Calculate Usage
prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len(
encoding.encode(model_response["choices"][0]["message"].get("content"))
)
model_response["model"] = model
model_response["usage"] = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
return model_response
def completion( def completion(
@ -241,7 +278,7 @@ def completion(
additional_args={ additional_args={
"complete_input_dict": data, "complete_input_dict": data,
"headers": headers, "headers": headers,
"api_base": api_base, "api_base": model,
}, },
) )
if acompletion == True: if acompletion == True:

View file

@ -12,6 +12,15 @@ class AsyncHTTPHandler:
timeout: Optional[Union[float, httpx.Timeout]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None,
concurrent_limit=1000, concurrent_limit=1000,
): ):
self.timeout = timeout
self.client = self.create_client(
timeout=timeout, concurrent_limit=concurrent_limit
)
def create_client(
self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
) -> httpx.AsyncClient:
async_proxy_mounts = None async_proxy_mounts = None
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly. # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
http_proxy = os.getenv("HTTP_PROXY", None) http_proxy = os.getenv("HTTP_PROXY", None)
@ -39,7 +48,8 @@ class AsyncHTTPHandler:
if timeout is None: if timeout is None:
timeout = _DEFAULT_TIMEOUT timeout = _DEFAULT_TIMEOUT
# Create a client with a connection pool # Create a client with a connection pool
self.client = httpx.AsyncClient(
return httpx.AsyncClient(
timeout=timeout, timeout=timeout,
limits=httpx.Limits( limits=httpx.Limits(
max_connections=concurrent_limit, max_connections=concurrent_limit,
@ -83,11 +93,48 @@ class AsyncHTTPHandler:
response = await self.client.send(req, stream=stream) response = await self.client.send(req, stream=stream)
response.raise_for_status() response.raise_for_status()
return response return response
except httpx.RemoteProtocolError:
# Retry the request with a new session if there is a connection error
new_client = self.create_client(timeout=self.timeout, concurrent_limit=1)
try:
return await self.single_connection_post_request(
url=url,
client=new_client,
data=data,
json=json,
params=params,
headers=headers,
stream=stream,
)
finally:
await new_client.aclose()
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
raise e raise e
except Exception as e: except Exception as e:
raise e raise e
async def single_connection_post_request(
self,
url: str,
client: httpx.AsyncClient,
data: Optional[Union[dict, str]] = None, # type: ignore
json: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
):
"""
Making POST request for a single connection client.
Used for retrying connection client errors.
"""
req = client.build_request(
"POST", url, data=data, json=json, params=params, headers=headers # type: ignore
)
response = await client.send(req, stream=stream)
response.raise_for_status()
return response
def __del__(self) -> None: def __del__(self) -> None:
try: try:
asyncio.get_running_loop().create_task(self.close()) asyncio.get_running_loop().create_task(self.close())

View file

@ -28,6 +28,7 @@ from .prompt_templates.factory import prompt_factory, custom_prompt
from openai import OpenAI, AsyncOpenAI from openai import OpenAI, AsyncOpenAI
from ..types.llms.openai import * from ..types.llms.openai import *
import openai import openai
from litellm.types.utils import ProviderField
class OpenAIError(Exception): class OpenAIError(Exception):
@ -164,6 +165,68 @@ class MistralConfig:
return optional_params return optional_params
class MistralEmbeddingConfig:
"""
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
"""
def __init__(
self,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"encoding_format",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "encoding_format":
optional_params["encoding_format"] = value
return optional_params
class AzureAIStudioConfig:
def get_required_params(self) -> List[ProviderField]:
"""For a given provider, return it's required fields with a description"""
return [
ProviderField(
field_name="api_key",
field_type="string",
field_description="Your Azure AI Studio API Key.",
field_value="zEJ...",
),
ProviderField(
field_name="api_base",
field_type="string",
field_description="Your Azure AI Studio API Base.",
field_value="https://Mistral-serverless.",
),
]
class DeepInfraConfig: class DeepInfraConfig:
""" """
Reference: https://deepinfra.com/docs/advanced/openai_api Reference: https://deepinfra.com/docs/advanced/openai_api

View file

@ -4,6 +4,7 @@ from enum import Enum
import requests # type: ignore import requests # type: ignore
import time import time
from typing import Callable, Optional, Union, List, Literal, Any from typing import Callable, Optional, Union, List, Literal, Any
from pydantic import BaseModel
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
import litellm, uuid import litellm, uuid
import httpx, inspect # type: ignore import httpx, inspect # type: ignore
@ -12,7 +13,12 @@ from litellm.llms.prompt_templates.factory import (
convert_to_gemini_tool_call_result, convert_to_gemini_tool_call_result,
convert_to_gemini_tool_call_invoke, convert_to_gemini_tool_call_invoke,
) )
from litellm.types.files import get_file_mime_type_for_file_type, get_file_type_from_extension, is_gemini_1_5_accepted_file_type, is_video_file_type from litellm.types.files import (
get_file_mime_type_for_file_type,
get_file_type_from_extension,
is_gemini_1_5_accepted_file_type,
is_video_file_type,
)
class VertexAIError(Exception): class VertexAIError(Exception):
@ -611,7 +617,7 @@ def completion(
llm_model = None llm_model = None
# NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now # NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
if acompletion == True: if acompletion is True:
data = { data = {
"llm_model": llm_model, "llm_model": llm_model,
"mode": mode, "mode": mode,
@ -643,7 +649,7 @@ def completion(
tools = optional_params.pop("tools", None) tools = optional_params.pop("tools", None)
content = _gemini_convert_messages_with_history(messages=messages) content = _gemini_convert_messages_with_history(messages=messages)
stream = optional_params.pop("stream", False) stream = optional_params.pop("stream", False)
if stream == True: if stream is True:
request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n" request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
logging_obj.pre_call( logging_obj.pre_call(
input=prompt, input=prompt,
@ -1293,6 +1299,95 @@ async def async_streaming(
return streamwrapper return streamwrapper
class VertexAITextEmbeddingConfig(BaseModel):
"""
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
Args:
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
"""
auto_truncate: Optional[bool] = None
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None
title: Optional[str] = None
def __init__(
self,
auto_truncate: Optional[bool] = None,
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None,
title: Optional[str] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"dimensions",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "dimensions":
optional_params["output_dimensionality"] = value
return optional_params
def get_mapped_special_auth_params(self) -> dict:
"""
Common auth params across bedrock/vertex_ai/azure/watsonx
"""
return {"project": "vertex_project", "region_name": "vertex_location"}
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
mapped_params = self.get_mapped_special_auth_params()
for param, value in non_default_params.items():
if param in mapped_params:
optional_params[mapped_params[param]] = value
return optional_params
def embedding( def embedding(
model: str, model: str,
input: Union[list, str], input: Union[list, str],
@ -1316,7 +1411,7 @@ def embedding(
message="vertexai import failed please run `pip install google-cloud-aiplatform`", message="vertexai import failed please run `pip install google-cloud-aiplatform`",
) )
from vertexai.language_models import TextEmbeddingModel from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
import google.auth # type: ignore import google.auth # type: ignore
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744 ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
@ -1347,6 +1442,16 @@ def embedding(
if isinstance(input, str): if isinstance(input, str):
input = [input] input = [input]
if optional_params is not None and isinstance(optional_params, dict):
if optional_params.get("task_type") or optional_params.get("title"):
# if user passed task_type or title, cast to TextEmbeddingInput
_task_type = optional_params.pop("task_type", None)
_title = optional_params.pop("title", None)
input = [
TextEmbeddingInput(text=x, task_type=_task_type, title=_title)
for x in input
]
try: try:
llm_model = TextEmbeddingModel.from_pretrained(model) llm_model = TextEmbeddingModel.from_pretrained(model)
except Exception as e: except Exception as e:
@ -1363,7 +1468,8 @@ def embedding(
encoding=encoding, encoding=encoding,
) )
request_str = f"""embeddings = llm_model.get_embeddings({input})""" _input_dict = {"texts": input, **optional_params}
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
## LOGGING PRE-CALL ## LOGGING PRE-CALL
logging_obj.pre_call( logging_obj.pre_call(
input=input, input=input,
@ -1375,7 +1481,7 @@ def embedding(
) )
try: try:
embeddings = llm_model.get_embeddings(input) embeddings = llm_model.get_embeddings(**_input_dict)
except Exception as e: except Exception as e:
raise VertexAIError(status_code=500, message=str(e)) raise VertexAIError(status_code=500, message=str(e))
@ -1383,6 +1489,7 @@ def embedding(
logging_obj.post_call(input=input, api_key=None, original_response=embeddings) logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
## Populate OpenAI compliant dictionary ## Populate OpenAI compliant dictionary
embedding_response = [] embedding_response = []
input_tokens: int = 0
for idx, embedding in enumerate(embeddings): for idx, embedding in enumerate(embeddings):
embedding_response.append( embedding_response.append(
{ {
@ -1391,14 +1498,10 @@ def embedding(
"embedding": embedding.values, "embedding": embedding.values,
} }
) )
input_tokens += embedding.statistics.token_count
model_response["object"] = "list" model_response["object"] = "list"
model_response["data"] = embedding_response model_response["data"] = embedding_response
model_response["model"] = model model_response["model"] = model
input_tokens = 0
input_str = "".join(input)
input_tokens += len(encoding.encode(input_str))
usage = Usage( usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
@ -1420,7 +1523,8 @@ async def async_embedding(
""" """
Async embedding implementation Async embedding implementation
""" """
request_str = f"""embeddings = llm_model.get_embeddings({input})""" _input_dict = {"texts": input, **optional_params}
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
## LOGGING PRE-CALL ## LOGGING PRE-CALL
logging_obj.pre_call( logging_obj.pre_call(
input=input, input=input,
@ -1432,7 +1536,7 @@ async def async_embedding(
) )
try: try:
embeddings = await client.get_embeddings_async(input) embeddings = await client.get_embeddings_async(**_input_dict)
except Exception as e: except Exception as e:
raise VertexAIError(status_code=500, message=str(e)) raise VertexAIError(status_code=500, message=str(e))
@ -1440,6 +1544,7 @@ async def async_embedding(
logging_obj.post_call(input=input, api_key=None, original_response=embeddings) logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
## Populate OpenAI compliant dictionary ## Populate OpenAI compliant dictionary
embedding_response = [] embedding_response = []
input_tokens: int = 0
for idx, embedding in enumerate(embeddings): for idx, embedding in enumerate(embeddings):
embedding_response.append( embedding_response.append(
{ {
@ -1448,18 +1553,13 @@ async def async_embedding(
"embedding": embedding.values, "embedding": embedding.values,
} }
) )
input_tokens += embedding.statistics.token_count
model_response["object"] = "list" model_response["object"] = "list"
model_response["data"] = embedding_response model_response["data"] = embedding_response
model_response["model"] = model model_response["model"] = model
input_tokens = 0
input_str = "".join(input)
input_tokens += len(encoding.encode(input_str))
usage = Usage( usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
) )
model_response.usage = usage model_response.usage = usage
return model_response return model_response

View file

@ -1,3 +1,7 @@
# What is this?
## httpx client for vertex ai calls
## Initial implementation - covers gemini + image gen calls
from functools import partial
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
@ -9,6 +13,284 @@ import litellm, uuid
import httpx, inspect # type: ignore import httpx, inspect # type: ignore
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from .base import BaseLLM from .base import BaseLLM
from litellm.types.llms.vertex_ai import (
ContentType,
SystemInstructions,
PartType,
RequestBody,
GenerateContentResponseBody,
FunctionCallingConfig,
FunctionDeclaration,
Tools,
ToolConfig,
GenerationConfig,
)
from litellm.llms.vertex_ai import _gemini_convert_messages_with_history
from litellm.types.utils import GenericStreamingChunk
from litellm.types.llms.openai import (
ChatCompletionUsageBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionResponseMessage,
)
class VertexGeminiConfig:
"""
Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
The class `VertexAIConfig` provides configuration for the VertexAI's API interface. Below are the parameters:
- `temperature` (float): This controls the degree of randomness in token selection.
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'.
- `candidate_count` (int): Number of generated responses to return.
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
- `frequency_penalty` (float): This parameter is used to penalize the model from repeating the same output. The default value is 0.0.
- `presence_penalty` (float): This parameter is used to penalize the model from generating the same output as the input. The default value is 0.0.
Note: Please make sure to modify the default parameters as required for your use case.
"""
temperature: Optional[float] = None
max_output_tokens: Optional[int] = None
top_p: Optional[float] = None
top_k: Optional[int] = None
response_mime_type: Optional[str] = None
candidate_count: Optional[int] = None
stop_sequences: Optional[list] = None
frequency_penalty: Optional[float] = None
presence_penalty: Optional[float] = None
def __init__(
self,
temperature: Optional[float] = None,
max_output_tokens: Optional[int] = None,
top_p: Optional[float] = None,
top_k: Optional[int] = None,
response_mime_type: Optional[str] = None,
candidate_count: Optional[int] = None,
stop_sequences: Optional[list] = None,
frequency_penalty: Optional[float] = None,
presence_penalty: Optional[float] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"temperature",
"top_p",
"max_tokens",
"stream",
"tools",
"tool_choice",
"response_format",
"n",
"stop",
]
def map_tool_choice_values(
self, model: str, tool_choice: Union[str, dict]
) -> Optional[ToolConfig]:
if tool_choice == "none":
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
elif tool_choice == "required":
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
elif tool_choice == "auto":
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
elif isinstance(tool_choice, dict):
# only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
name = tool_choice.get("function", {}).get("name", "")
return ToolConfig(
functionCallingConfig=FunctionCallingConfig(
mode="ANY", allowed_function_names=[name]
)
)
else:
raise litellm.utils.UnsupportedParamsError(
message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
tool_choice
),
status_code=400,
)
def map_openai_params(
self,
model: str,
non_default_params: dict,
optional_params: dict,
):
for param, value in non_default_params.items():
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
if (
param == "stream" and value is True
): # sending stream = False, can cause it to get passed unchecked and raise issues
optional_params["stream"] = value
if param == "n":
optional_params["candidate_count"] = value
if param == "stop":
if isinstance(value, str):
optional_params["stop_sequences"] = [value]
elif isinstance(value, list):
optional_params["stop_sequences"] = value
if param == "max_tokens":
optional_params["max_output_tokens"] = value
if param == "response_format" and value["type"] == "json_object": # type: ignore
optional_params["response_mime_type"] = "application/json"
if param == "frequency_penalty":
optional_params["frequency_penalty"] = value
if param == "presence_penalty":
optional_params["presence_penalty"] = value
if param == "tools" and isinstance(value, list):
gtool_func_declarations = []
for tool in value:
gtool_func_declaration = FunctionDeclaration(
name=tool["function"]["name"],
description=tool["function"].get("description", ""),
parameters=tool["function"].get("parameters", {}),
)
gtool_func_declarations.append(gtool_func_declaration)
optional_params["tools"] = [
Tools(function_declarations=gtool_func_declarations)
]
if param == "tool_choice" and (
isinstance(value, str) or isinstance(value, dict)
):
_tool_choice_value = self.map_tool_choice_values(
model=model, tool_choice=value # type: ignore
)
if _tool_choice_value is not None:
optional_params["tool_choice"] = _tool_choice_value
return optional_params
def get_mapped_special_auth_params(self) -> dict:
"""
Common auth params across bedrock/vertex_ai/azure/watsonx
"""
return {"project": "vertex_project", "region_name": "vertex_location"}
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
mapped_params = self.get_mapped_special_auth_params()
for param, value in non_default_params.items():
if param in mapped_params:
optional_params[mapped_params[param]] = value
return optional_params
def get_eu_regions(self) -> List[str]:
"""
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#available-regions
"""
return [
"europe-central2",
"europe-north1",
"europe-southwest1",
"europe-west1",
"europe-west2",
"europe-west3",
"europe-west4",
"europe-west6",
"europe-west8",
"europe-west9",
]
async def make_call(
client: Optional[AsyncHTTPHandler],
api_base: str,
headers: dict,
data: str,
model: str,
messages: list,
logging_obj,
):
if client is None:
client = AsyncHTTPHandler() # Create a new client if none provided
response = await client.post(api_base, headers=headers, data=data, stream=True)
if response.status_code != 200:
raise VertexAIError(status_code=response.status_code, message=response.text)
completion_stream = ModelResponseIterator(
streaming_response=response.aiter_bytes(chunk_size=2056)
)
# LOGGING
logging_obj.post_call(
input=messages,
api_key="",
original_response="first stream response received",
additional_args={"complete_input_dict": data},
)
return completion_stream
def make_sync_call(
client: Optional[HTTPHandler],
api_base: str,
headers: dict,
data: str,
model: str,
messages: list,
logging_obj,
):
if client is None:
client = HTTPHandler() # Create a new client if none provided
response = client.post(api_base, headers=headers, data=data, stream=True)
if response.status_code != 200:
raise VertexAIError(status_code=response.status_code, message=response.read())
completion_stream = ModelResponseIterator(
streaming_response=response.iter_bytes(chunk_size=2056)
)
# LOGGING
logging_obj.post_call(
input=messages,
api_key="",
original_response="first stream response received",
additional_args={"complete_input_dict": data},
)
return completion_stream
class VertexAIError(Exception): class VertexAIError(Exception):
@ -33,16 +315,125 @@ class VertexLLM(BaseLLM):
self.project_id: Optional[str] = None self.project_id: Optional[str] = None
self.async_handler: Optional[AsyncHTTPHandler] = None self.async_handler: Optional[AsyncHTTPHandler] = None
def load_auth(self) -> Tuple[Any, str]: def _process_response(
self,
model: str,
response: httpx.Response,
model_response: ModelResponse,
logging_obj: litellm.utils.Logging,
optional_params: dict,
api_key: str,
data: Union[dict, str],
messages: List,
print_verbose,
encoding,
) -> ModelResponse:
## LOGGING
logging_obj.post_call(
input=messages,
api_key="",
original_response=response.text,
additional_args={"complete_input_dict": data},
)
print_verbose(f"raw model_response: {response.text}")
## RESPONSE OBJECT
try:
completion_response = GenerateContentResponseBody(**response.json()) # type: ignore
except Exception as e:
raise VertexAIError(
message="Received={}, Error converting to valid response block={}. File an issue if litellm error - https://github.com/BerriAI/litellm/issues".format(
response.text, str(e)
),
status_code=422,
)
model_response.choices = [] # type: ignore
## GET MODEL ##
model_response.model = model
## GET TEXT ##
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
content_str = ""
tools: List[ChatCompletionToolCallChunk] = []
for idx, candidate in enumerate(completion_response["candidates"]):
if "content" not in candidate:
continue
if "text" in candidate["content"]["parts"][0]:
content_str = candidate["content"]["parts"][0]["text"]
if "functionCall" in candidate["content"]["parts"][0]:
_function_chunk = ChatCompletionToolCallFunctionChunk(
name=candidate["content"]["parts"][0]["functionCall"]["name"],
arguments=json.dumps(
candidate["content"]["parts"][0]["functionCall"]["args"]
),
)
_tool_response_chunk = ChatCompletionToolCallChunk(
id=f"call_{str(uuid.uuid4())}",
type="function",
function=_function_chunk,
)
tools.append(_tool_response_chunk)
chat_completion_message["content"] = content_str
chat_completion_message["tool_calls"] = tools
choice = litellm.Choices(
finish_reason=candidate.get("finishReason", "stop"),
index=candidate.get("index", idx),
message=chat_completion_message, # type: ignore
logprobs=None,
enhancements=None,
)
model_response.choices.append(choice)
## GET USAGE ##
usage = litellm.Usage(
prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
completion_tokens=completion_response["usageMetadata"][
"candidatesTokenCount"
],
total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
)
setattr(model_response, "usage", usage)
return model_response
def get_vertex_region(self, vertex_region: Optional[str]) -> str:
return vertex_region or "us-central1"
def load_auth(
self, credentials: Optional[str], project_id: Optional[str]
) -> Tuple[Any, str]:
from google.auth.transport.requests import Request # type: ignore[import-untyped] from google.auth.transport.requests import Request # type: ignore[import-untyped]
from google.auth.credentials import Credentials # type: ignore[import-untyped] from google.auth.credentials import Credentials # type: ignore[import-untyped]
import google.auth as google_auth import google.auth as google_auth
credentials, project_id = google_auth.default( if credentials is not None and isinstance(credentials, str):
import google.oauth2.service_account
json_obj = json.loads(credentials)
creds = google.oauth2.service_account.Credentials.from_service_account_info(
json_obj,
scopes=["https://www.googleapis.com/auth/cloud-platform"], scopes=["https://www.googleapis.com/auth/cloud-platform"],
) )
credentials.refresh(Request()) if project_id is None:
project_id = creds.project_id
else:
creds, project_id = google_auth.default(
quota_project_id=project_id,
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
creds.refresh(Request())
if not project_id: if not project_id:
raise ValueError("Could not resolve project_id") raise ValueError("Could not resolve project_id")
@ -52,38 +443,272 @@ class VertexLLM(BaseLLM):
f"Expected project_id to be a str but got {type(project_id)}" f"Expected project_id to be a str but got {type(project_id)}"
) )
return credentials, project_id return creds, project_id
def refresh_auth(self, credentials: Any) -> None: def refresh_auth(self, credentials: Any) -> None:
from google.auth.transport.requests import Request # type: ignore[import-untyped] from google.auth.transport.requests import Request # type: ignore[import-untyped]
credentials.refresh(Request()) credentials.refresh(Request())
def _prepare_request(self, request: httpx.Request) -> None: def _ensure_access_token(
access_token = self._ensure_access_token() self, credentials: Optional[str], project_id: Optional[str]
) -> Tuple[str, str]:
if request.headers.get("Authorization"): """
# already authenticated, nothing for us to do Returns auth token and project id
return """
if self.access_token is not None and self.project_id is not None:
request.headers["Authorization"] = f"Bearer {access_token}" return self.access_token, self.project_id
def _ensure_access_token(self) -> str:
if self.access_token is not None:
return self.access_token
if not self._credentials: if not self._credentials:
self._credentials, project_id = self.load_auth() self._credentials, project_id = self.load_auth(
credentials=credentials, project_id=project_id
)
if not self.project_id: if not self.project_id:
self.project_id = project_id self.project_id = project_id
else: else:
self.refresh_auth(self._credentials) self.refresh_auth(self._credentials)
if not self._credentials.token: if not self.project_id:
self.project_id = self._credentials.project_id
if not self.project_id:
raise ValueError("Could not resolve project_id")
if not self._credentials or not self._credentials.token:
raise RuntimeError("Could not resolve API token from the environment") raise RuntimeError("Could not resolve API token from the environment")
assert isinstance(self._credentials.token, str) return self._credentials.token, self.project_id
return self._credentials.token
async def async_streaming(
self,
model: str,
messages: list,
api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
data: str,
timeout: Optional[Union[float, httpx.Timeout]],
encoding,
logging_obj,
stream,
optional_params: dict,
litellm_params=None,
logger_fn=None,
headers={},
client: Optional[AsyncHTTPHandler] = None,
) -> CustomStreamWrapper:
streaming_response = CustomStreamWrapper(
completion_stream=None,
make_call=partial(
make_call,
client=client,
api_base=api_base,
headers=headers,
data=data,
model=model,
messages=messages,
logging_obj=logging_obj,
),
model=model,
custom_llm_provider="vertex_ai_beta",
logging_obj=logging_obj,
)
return streaming_response
async def async_completion(
self,
model: str,
messages: list,
api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
data: str,
timeout: Optional[Union[float, httpx.Timeout]],
encoding,
logging_obj,
stream,
optional_params: dict,
litellm_params=None,
logger_fn=None,
headers={},
client: Optional[AsyncHTTPHandler] = None,
) -> Union[ModelResponse, CustomStreamWrapper]:
if client is None:
_params = {}
if timeout is not None:
if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout
client = AsyncHTTPHandler(**_params) # type: ignore
else:
client = client # type: ignore
try:
response = await client.post(api_base, headers=headers, json=data) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise VertexAIError(status_code=error_code, message=err.response.text)
except httpx.TimeoutException:
raise VertexAIError(status_code=408, message="Timeout error occurred.")
return self._process_response(
model=model,
response=response,
model_response=model_response,
logging_obj=logging_obj,
api_key="",
data=data,
messages=messages,
print_verbose=print_verbose,
optional_params=optional_params,
encoding=encoding,
)
def completion(
self,
model: str,
messages: list,
model_response: ModelResponse,
print_verbose: Callable,
encoding,
logging_obj,
optional_params: dict,
acompletion: bool,
timeout: Optional[Union[float, httpx.Timeout]],
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
litellm_params=None,
logger_fn=None,
extra_headers: Optional[dict] = None,
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
) -> Union[ModelResponse, CustomStreamWrapper]:
auth_header, vertex_project = self._ensure_access_token(
credentials=vertex_credentials, project_id=vertex_project
)
vertex_location = self.get_vertex_region(vertex_region=vertex_location)
stream: Optional[bool] = optional_params.pop("stream", None) # type: ignore
### SET RUNTIME ENDPOINT ###
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:generateContent"
## TRANSFORMATION ##
# Separate system prompt from rest of message
system_prompt_indices = []
system_content_blocks: List[PartType] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
_system_content_block = PartType(text=message["content"])
system_content_blocks.append(_system_content_block)
system_prompt_indices.append(idx)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
system_instructions = SystemInstructions(parts=system_content_blocks)
content = _gemini_convert_messages_with_history(messages=messages)
tools: Optional[Tools] = optional_params.pop("tools", None)
tool_choice: Optional[ToolConfig] = optional_params.pop("tool_choice", None)
generation_config: Optional[GenerationConfig] = GenerationConfig(
**optional_params
)
data = RequestBody(system_instruction=system_instructions, contents=content)
if tools is not None:
data["tools"] = tools
if tool_choice is not None:
data["toolConfig"] = tool_choice
if generation_config is not None:
data["generationConfig"] = generation_config
headers = {
"Content-Type": "application/json; charset=utf-8",
"Authorization": f"Bearer {auth_header}",
}
## LOGGING
logging_obj.pre_call(
input=messages,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": url,
"headers": headers,
},
)
### ROUTING (ASYNC, STREAMING, SYNC)
if acompletion:
### ASYNC COMPLETION
return self.async_completion(
model=model,
messages=messages,
data=data, # type: ignore
api_base=url,
model_response=model_response,
print_verbose=print_verbose,
encoding=encoding,
logging_obj=logging_obj,
optional_params=optional_params,
stream=stream,
litellm_params=litellm_params,
logger_fn=logger_fn,
headers=headers,
timeout=timeout,
client=client, # type: ignore
)
## SYNC STREAMING CALL ##
if stream is not None and stream is True:
streaming_response = CustomStreamWrapper(
completion_stream=None,
make_call=partial(
make_sync_call,
client=None,
api_base=url,
headers=headers, # type: ignore
data=json.dumps(data),
model=model,
messages=messages,
logging_obj=logging_obj,
),
model=model,
custom_llm_provider="vertex_ai_beta",
logging_obj=logging_obj,
)
return streaming_response
## COMPLETION CALL ##
if client is None or isinstance(client, AsyncHTTPHandler):
_params = {}
if timeout is not None:
if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout
client = HTTPHandler(**_params) # type: ignore
else:
client = client
try:
response = client.post(url=url, headers=headers, json=data) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise VertexAIError(status_code=error_code, message=response.text)
except httpx.TimeoutException:
raise VertexAIError(status_code=408, message="Timeout error occurred.")
return self._process_response(
model=model,
response=response,
model_response=model_response,
logging_obj=logging_obj,
optional_params=optional_params,
api_key="",
data=data, # type: ignore
messages=messages,
print_verbose=print_verbose,
encoding=encoding,
)
def image_generation( def image_generation(
self, self,
@ -163,7 +788,7 @@ class VertexLLM(BaseLLM):
} \ } \
"https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict" "https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
""" """
auth_header = self._ensure_access_token() auth_header, _ = self._ensure_access_token(credentials=None, project_id=None)
optional_params = optional_params or { optional_params = optional_params or {
"sampleCount": 1 "sampleCount": 1
} # default optional params } # default optional params
@ -222,3 +847,84 @@ class VertexLLM(BaseLLM):
model_response.data = _response_data model_response.data = _response_data
return model_response return model_response
class ModelResponseIterator:
def __init__(self, streaming_response):
self.streaming_response = streaming_response
self.response_iterator = iter(self.streaming_response)
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
try:
processed_chunk = GenerateContentResponseBody(**chunk) # type: ignore
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
is_finished = False
finish_reason = ""
usage: Optional[ChatCompletionUsageBlock] = None
gemini_chunk = processed_chunk["candidates"][0]
if (
"content" in gemini_chunk
and "text" in gemini_chunk["content"]["parts"][0]
):
text = gemini_chunk["content"]["parts"][0]["text"]
if "finishReason" in gemini_chunk:
finish_reason = map_finish_reason(
finish_reason=gemini_chunk["finishReason"]
)
is_finished = True
if "usageMetadata" in processed_chunk:
usage = ChatCompletionUsageBlock(
prompt_tokens=processed_chunk["usageMetadata"]["promptTokenCount"],
completion_tokens=processed_chunk["usageMetadata"][
"candidatesTokenCount"
],
total_tokens=processed_chunk["usageMetadata"]["totalTokenCount"],
)
returned_chunk = GenericStreamingChunk(
text=text,
tool_use=tool_use,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
index=0,
)
return returned_chunk
except json.JSONDecodeError:
raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
# Sync iterator
def __iter__(self):
return self
def __next__(self):
try:
chunk = next(self.response_iterator)
chunk = chunk.decode()
json_chunk = json.loads(chunk)
return self.chunk_parser(chunk=json_chunk)
except StopIteration:
raise StopIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e}")
# Async iterator
def __aiter__(self):
self.async_response_iterator = self.streaming_response.__aiter__()
return self
async def __anext__(self):
try:
chunk = await self.async_response_iterator.__anext__()
chunk = chunk.decode()
json_chunk = json.loads(chunk)
return self.chunk_parser(chunk=json_chunk)
except StopAsyncIteration:
raise StopAsyncIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e}")

View file

@ -329,12 +329,14 @@ async def acompletion(
or custom_llm_provider == "ollama_chat" or custom_llm_provider == "ollama_chat"
or custom_llm_provider == "replicate" or custom_llm_provider == "replicate"
or custom_llm_provider == "vertex_ai" or custom_llm_provider == "vertex_ai"
or custom_llm_provider == "vertex_ai_beta"
or custom_llm_provider == "gemini" or custom_llm_provider == "gemini"
or custom_llm_provider == "sagemaker" or custom_llm_provider == "sagemaker"
or custom_llm_provider == "anthropic" or custom_llm_provider == "anthropic"
or custom_llm_provider == "predibase" or custom_llm_provider == "predibase"
or custom_llm_provider == "bedrock" or custom_llm_provider == "bedrock"
or custom_llm_provider == "databricks" or custom_llm_provider == "databricks"
or custom_llm_provider == "clarifai"
or custom_llm_provider in litellm.openai_compatible_providers or custom_llm_provider in litellm.openai_compatible_providers
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all. ): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
init_response = await loop.run_in_executor(None, func_with_context) init_response = await loop.run_in_executor(None, func_with_context)
@ -1875,6 +1877,42 @@ def completion(
) )
return response return response
response = model_response response = model_response
elif custom_llm_provider == "vertex_ai_beta":
vertex_ai_project = (
optional_params.pop("vertex_project", None)
or optional_params.pop("vertex_ai_project", None)
or litellm.vertex_project
or get_secret("VERTEXAI_PROJECT")
)
vertex_ai_location = (
optional_params.pop("vertex_location", None)
or optional_params.pop("vertex_ai_location", None)
or litellm.vertex_location
or get_secret("VERTEXAI_LOCATION")
)
vertex_credentials = (
optional_params.pop("vertex_credentials", None)
or optional_params.pop("vertex_ai_credentials", None)
or get_secret("VERTEXAI_CREDENTIALS")
)
new_params = deepcopy(optional_params)
response = vertex_chat_completion.completion( # type: ignore
model=model,
messages=messages,
model_response=model_response,
print_verbose=print_verbose,
optional_params=new_params,
litellm_params=litellm_params,
logger_fn=logger_fn,
encoding=encoding,
vertex_location=vertex_ai_location,
vertex_project=vertex_ai_project,
vertex_credentials=vertex_credentials,
logging_obj=logging,
acompletion=acompletion,
timeout=timeout,
)
elif custom_llm_provider == "vertex_ai": elif custom_llm_provider == "vertex_ai":
vertex_ai_project = ( vertex_ai_project = (
optional_params.pop("vertex_project", None) optional_params.pop("vertex_project", None)
@ -1893,6 +1931,7 @@ def completion(
or optional_params.pop("vertex_ai_credentials", None) or optional_params.pop("vertex_ai_credentials", None)
or get_secret("VERTEXAI_CREDENTIALS") or get_secret("VERTEXAI_CREDENTIALS")
) )
new_params = deepcopy(optional_params) new_params = deepcopy(optional_params)
if "claude-3" in model: if "claude-3" in model:
model_response = vertex_ai_anthropic.completion( model_response = vertex_ai_anthropic.completion(

View file

@ -3347,6 +3347,24 @@
"litellm_provider": "deepinfra", "litellm_provider": "deepinfra",
"mode": "chat" "mode": "chat"
}, },
"deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
"max_tokens": 8191,
"max_input_tokens": 8191,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000008,
"output_cost_per_token": 0.00000008,
"litellm_provider": "deepinfra",
"mode": "chat"
},
"deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": {
"max_tokens": 8191,
"max_input_tokens": 8191,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000059,
"output_cost_per_token": 0.00000079,
"litellm_provider": "deepinfra",
"mode": "chat"
},
"deepinfra/01-ai/Yi-34B-200K": { "deepinfra/01-ai/Yi-34B-200K": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);

View file

@ -0,0 +1 @@
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f02cb03d96e276ef.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}(); !function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""] 3:I[68101,["936","static/chunks/2f6dbc85-cac2949a76539886.js","505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","684","static/chunks/684-16b194c83a169f6d.js","759","static/chunks/759-c0083d8a782d300e.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-8028473f1a04553d.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""] 3:I[87494,["505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-a1942d43573c82c3.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""] 3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","505","static/chunks/505-5ff3c318fddfa35c.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-49a30e653b6ae929.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -11,7 +11,7 @@ model_list:
- model_name: fake-openai-endpoint - model_name: fake-openai-endpoint
litellm_params: litellm_params:
model: predibase/llama-3-8b-instruct model: predibase/llama-3-8b-instruct
api_base: "http://0.0.0.0:8081" api_base: "http://0.0.0.0:8000"
api_key: os.environ/PREDIBASE_API_KEY api_key: os.environ/PREDIBASE_API_KEY
tenant_id: os.environ/PREDIBASE_TENANT_ID tenant_id: os.environ/PREDIBASE_TENANT_ID
max_retries: 0 max_retries: 0
@ -55,7 +55,16 @@ model_list:
model: textembedding-gecko-multilingual@001 model: textembedding-gecko-multilingual@001
vertex_project: my-project-9d5c vertex_project: my-project-9d5c
vertex_location: us-central1 vertex_location: us-central1
- model_name: lbl/command-r-plus
litellm_params:
model: openai/lbl/command-r-plus
api_key: "os.environ/VLLM_API_KEY"
api_base: http://vllm-command:8000/v1
rpm: 1000
input_cost_per_token: 0
output_cost_per_token: 0
model_info:
max_input_tokens: 80920
assistant_settings: assistant_settings:
custom_llm_provider: openai custom_llm_provider: openai
litellm_params: litellm_params:

View file

@ -224,6 +224,7 @@ class LiteLLMRoutes(enum.Enum):
"/key/delete", "/key/delete",
"/global/spend/logs", "/global/spend/logs",
"/global/predict/spend/logs", "/global/predict/spend/logs",
"/sso/get/logout_url",
] ]
management_routes: List = [ # key management_routes: List = [ # key

View file

@ -32,7 +32,7 @@ def management_endpoint_wrapper(func):
if open_telemetry_logger is not None: if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request") _http_request: Request = kwargs.get("http_request")
if _http_request:
_route = _http_request.url.path _route = _http_request.url.path
_request_body: dict = await _read_request_body( _request_body: dict = await _read_request_body(
request=_http_request request=_http_request
@ -67,6 +67,7 @@ def management_endpoint_wrapper(func):
if open_telemetry_logger is not None: if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request") _http_request: Request = kwargs.get("http_request")
if _http_request:
_route = _http_request.url.path _route = _http_request.url.path
_request_body: dict = await _read_request_body( _request_body: dict = await _read_request_body(
request=_http_request request=_http_request

View file

@ -140,7 +140,7 @@ class _PROXY_AzureContentSafety(
response.choices[0], litellm.utils.Choices response.choices[0], litellm.utils.Choices
): ):
await self.test_violation( await self.test_violation(
content=response.choices[0].message.content, source="output" content=response.choices[0].message.content or "", source="output"
) )
# async def async_post_call_streaming_hook( # async def async_post_call_streaming_hook(

View file

@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
data["cache"][k] = v data["cache"][k] = v
verbose_proxy_logger.debug("receiving data: %s", data) verbose_proxy_logger.debug("receiving data: %s", data)
# users can pass in 'user' param to /chat/completions. Don't override it
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
# if users are using user_api_key_auth, set `user` in `data`
data["user"] = user_api_key_dict.user_id
if "metadata" not in data: if "metadata" not in data:
data["metadata"] = {} data["metadata"] = {}
@ -108,6 +104,15 @@ async def add_litellm_data_to_request(
data["metadata"]["user_api_key_team_alias"] = getattr( data["metadata"]["user_api_key_team_alias"] = getattr(
user_api_key_dict, "team_alias", None user_api_key_dict, "team_alias", None
) )
# Team spend, budget - used by prometheus.py
data["metadata"]["user_api_key_team_max_budget"] = user_api_key_dict.team_max_budget
data["metadata"]["user_api_key_team_spend"] = user_api_key_dict.team_spend
# API Key spend, budget - used by prometheus.py
data["metadata"]["user_api_key_spend"] = user_api_key_dict.spend
data["metadata"]["user_api_key_max_budget"] = user_api_key_dict.max_budget
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
_headers = dict(request.headers) _headers = dict(request.headers)
_headers.pop( _headers.pop(

View file

@ -14,17 +14,18 @@ model_list:
litellm_params: litellm_params:
model: openai/* model: openai/*
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
- model_name: my-triton-model - model_name: mistral-embed
litellm_params: litellm_params:
model: triton/any" model: mistral/mistral-embed
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
litellm_settings: litellm_settings:
callbacks: ["otel"] success_callback: ["prometheus"]
failure_callback: ["prometheus"]
store_audit_logs: true store_audit_logs: true
turn_off_message_logging: true
redact_messages_in_exceptions: True redact_messages_in_exceptions: True
enforced_params: enforced_params:
- user - user

View file

@ -879,6 +879,7 @@ async def user_api_key_auth(
## check for cache hit (In-Memory Cache) ## check for cache hit (In-Memory Cache)
original_api_key = api_key # (Patch: For DynamoDB Backwards Compatibility) original_api_key = api_key # (Patch: For DynamoDB Backwards Compatibility)
_user_role = None
if api_key.startswith("sk-"): if api_key.startswith("sk-"):
api_key = hash_token(token=api_key) api_key = hash_token(token=api_key)
valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore
@ -1512,7 +1513,7 @@ async def user_api_key_auth(
): ):
return UserAPIKeyAuth( return UserAPIKeyAuth(
api_key=api_key, api_key=api_key,
user_role="app_owner", user_role=_user_role,
parent_otel_span=parent_otel_span, parent_otel_span=parent_otel_span,
**valid_token_dict, **valid_token_dict,
) )
@ -6649,7 +6650,7 @@ async def generate_key_fn(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True: if litellm.store_audit_logs is True:
_updated_values = json.dumps(response) _updated_values = json.dumps(response, default=str)
asyncio.create_task( asyncio.create_task(
create_audit_log_for_update( create_audit_log_for_update(
request_data=LiteLLM_AuditLogs( request_data=LiteLLM_AuditLogs(
@ -6754,10 +6755,10 @@ async def update_key_fn(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True: if litellm.store_audit_logs is True:
_updated_values = json.dumps(data_json) _updated_values = json.dumps(data_json, default=str)
_before_value = existing_key_row.json(exclude_none=True) _before_value = existing_key_row.json(exclude_none=True)
_before_value = json.dumps(_before_value) _before_value = json.dumps(_before_value, default=str)
asyncio.create_task( asyncio.create_task(
create_audit_log_for_update( create_audit_log_for_update(
@ -6853,7 +6854,7 @@ async def delete_key_fn(
) )
key_row = key_row.json(exclude_none=True) key_row = key_row.json(exclude_none=True)
_key_row = json.dumps(key_row) _key_row = json.dumps(key_row, default=str)
asyncio.create_task( asyncio.create_task(
create_audit_log_for_update( create_audit_log_for_update(
@ -7057,6 +7058,7 @@ async def info_key_fn(
"/spend/keys", "/spend/keys",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def spend_key_fn(): async def spend_key_fn():
""" """
@ -7089,6 +7091,7 @@ async def spend_key_fn():
"/spend/users", "/spend/users",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def spend_user_fn( async def spend_user_fn(
user_id: Optional[str] = fastapi.Query( user_id: Optional[str] = fastapi.Query(
@ -7219,6 +7222,7 @@ async def view_spend_tags(
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
}, },
include_in_schema=False,
) )
async def get_global_activity( async def get_global_activity(
start_date: Optional[str] = fastapi.Query( start_date: Optional[str] = fastapi.Query(
@ -7322,6 +7326,7 @@ async def get_global_activity(
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
}, },
include_in_schema=False,
) )
async def get_global_activity_model( async def get_global_activity_model(
start_date: Optional[str] = fastapi.Query( start_date: Optional[str] = fastapi.Query(
@ -7468,6 +7473,7 @@ async def get_global_activity_model(
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
}, },
include_in_schema=False,
) )
async def get_global_activity_exceptions_per_deployment( async def get_global_activity_exceptions_per_deployment(
model_group: str = fastapi.Query( model_group: str = fastapi.Query(
@ -7620,6 +7626,7 @@ async def get_global_activity_exceptions_per_deployment(
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
}, },
include_in_schema=False,
) )
async def get_global_activity_exceptions( async def get_global_activity_exceptions(
model_group: str = fastapi.Query( model_group: str = fastapi.Query(
@ -7830,7 +7837,6 @@ async def get_global_spend_provider(
"/global/spend/report", "/global/spend/report",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
}, },
@ -8530,6 +8536,7 @@ async def global_spend_reset():
"/global/spend/logs", "/global/spend/logs",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_spend_logs( async def global_spend_logs(
api_key: str = fastapi.Query( api_key: str = fastapi.Query(
@ -8575,6 +8582,7 @@ async def global_spend_logs(
"/global/spend", "/global/spend",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_spend(): async def global_spend():
""" """
@ -8601,6 +8609,7 @@ async def global_spend():
"/global/spend/keys", "/global/spend/keys",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_spend_keys( async def global_spend_keys(
limit: int = fastapi.Query( limit: int = fastapi.Query(
@ -8628,6 +8637,7 @@ async def global_spend_keys(
"/global/spend/teams", "/global/spend/teams",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_spend_per_team(): async def global_spend_per_team():
""" """
@ -8752,6 +8762,7 @@ async def global_view_all_end_users():
"/global/spend/end_users", "/global/spend/end_users",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None): async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
""" """
@ -8804,6 +8815,7 @@ LIMIT 100
"/global/spend/models", "/global/spend/models",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_spend_models( async def global_spend_models(
limit: int = fastapi.Query( limit: int = fastapi.Query(
@ -8832,6 +8844,7 @@ async def global_spend_models(
"/global/predict/spend/logs", "/global/predict/spend/logs",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def global_predict_spend_logs(request: Request): async def global_predict_spend_logs(request: Request):
from enterprise.utils import _forecast_daily_cost from enterprise.utils import _forecast_daily_cost
@ -8863,7 +8876,7 @@ async def new_user(data: NewUserRequest):
- organization_id: Optional[str] - specify the org a user belongs to. - organization_id: Optional[str] - specify the org a user belongs to.
- user_email: Optional[str] - Specify a user email. - user_email: Optional[str] - Specify a user email.
- send_invite_email: Optional[bool] - Specify if an invite email should be sent. - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user" - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
- max_budget: Optional[float] - Specify max budget for a given user. - max_budget: Optional[float] - Specify max budget for a given user.
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute) - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
@ -9947,16 +9960,18 @@ async def new_team(
""" """
Allow users to create a new team. Apply user permissions to their team. Allow users to create a new team. Apply user permissions to their team.
[ASK FOR HELP](https://github.com/BerriAI/litellm/issues) 👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
Parameters: Parameters:
- team_alias: Optional[str] - User defined team alias - team_alias: Optional[str] - User defined team alias
- team_id: Optional[str] - The team id of the user. If none passed, we'll generate it. - team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
- members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`. - members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } - metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed. - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id. - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
@ -9981,6 +9996,21 @@ async def new_team(
{"role": "user", "user_id": "user-2434"}] {"role": "user", "user_id": "user-2434"}]
}' }'
```
```
curl --location 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"team_alias": "QA Prod Bot",
"max_budget": 0.000000001,
"budget_duration": "1d"
}'
``` ```
""" """
global prisma_client global prisma_client
@ -10110,7 +10140,8 @@ async def new_team(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True: if litellm.store_audit_logs is True:
_updated_values = complete_team_data.json(exclude_none=True) _updated_values = complete_team_data.json(exclude_none=True)
_updated_values = json.dumps(_updated_values)
_updated_values = json.dumps(_updated_values, default=str)
asyncio.create_task( asyncio.create_task(
create_audit_log_for_update( create_audit_log_for_update(
@ -10174,6 +10205,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
@management_endpoint_wrapper @management_endpoint_wrapper
async def update_team( async def update_team(
data: UpdateTeamRequest, data: UpdateTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header( litellm_changed_by: Optional[str] = Header(
None, None,
@ -10192,6 +10224,7 @@ async def update_team(
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit - tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit - rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget - max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed. - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id. - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
@ -10209,6 +10242,20 @@ async def update_team(
"tpm_limit": 100 "tpm_limit": 100
}' }'
``` ```
Example - Update Team `max_budget` budget
```
curl --location 'http://0.0.0.0:8000/team/update' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_id": "litellm-test-client-id-new",
"max_budget": 10
}'
```
""" """
global prisma_client global prisma_client
@ -10248,8 +10295,8 @@ async def update_team(
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True: if litellm.store_audit_logs is True:
_before_value = existing_team_row.json(exclude_none=True) _before_value = existing_team_row.json(exclude_none=True)
_before_value = json.dumps(_before_value) _before_value = json.dumps(_before_value, default=str)
_after_value: str = json.dumps(updated_kv) _after_value: str = json.dumps(updated_kv, default=str)
asyncio.create_task( asyncio.create_task(
create_audit_log_for_update( create_audit_log_for_update(
@ -11408,7 +11455,7 @@ async def model_info_v2(
for _model in all_models: for _model in all_models:
# provided model_info in config.yaml # provided model_info in config.yaml
model_info = _model.get("model_info", {}) model_info = _model.get("model_info", {})
if debug == True: if debug is True:
_openai_client = "None" _openai_client = "None"
if llm_router is not None: if llm_router is not None:
_openai_client = ( _openai_client = (
@ -11433,7 +11480,7 @@ async def model_info_v2(
litellm_model = litellm_params.get("model", None) litellm_model = litellm_params.get("model", None)
try: try:
litellm_model_info = litellm.get_model_info(model=litellm_model) litellm_model_info = litellm.get_model_info(model=litellm_model)
except: except Exception:
litellm_model_info = {} litellm_model_info = {}
# 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map # 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
if litellm_model_info == {}: if litellm_model_info == {}:
@ -11444,8 +11491,10 @@ async def model_info_v2(
if len(split_model) > 0: if len(split_model) > 0:
litellm_model = split_model[-1] litellm_model = split_model[-1]
try: try:
litellm_model_info = litellm.get_model_info(model=litellm_model) litellm_model_info = litellm.get_model_info(
except: model=litellm_model, custom_llm_provider=split_model[0]
)
except Exception:
litellm_model_info = {} litellm_model_info = {}
for k, v in litellm_model_info.items(): for k, v in litellm_model_info.items():
if k not in model_info: if k not in model_info:
@ -11956,7 +12005,9 @@ async def model_info_v1(
if len(split_model) > 0: if len(split_model) > 0:
litellm_model = split_model[-1] litellm_model = split_model[-1]
try: try:
litellm_model_info = litellm.get_model_info(model=litellm_model) litellm_model_info = litellm.get_model_info(
model=litellm_model, custom_llm_provider=split_model[0]
)
except: except:
litellm_model_info = {} litellm_model_info = {}
for k, v in litellm_model_info.items(): for k, v in litellm_model_info.items():
@ -12223,6 +12274,7 @@ async def alerting_settings(
"/queue/chat/completions", "/queue/chat/completions",
tags=["experimental"], tags=["experimental"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def async_queue_request( async def async_queue_request(
request: Request, request: Request,
@ -12334,18 +12386,10 @@ async def async_queue_request(
) )
@router.get(
"/ollama_logs", dependencies=[Depends(user_api_key_auth)], tags=["experimental"]
)
async def retrieve_server_log(request: Request):
filepath = os.path.expanduser("~/.ollama/logs/server.log")
return FileResponse(filepath)
#### LOGIN ENDPOINTS #### #### LOGIN ENDPOINTS ####
@app.get("/sso/key/generate", tags=["experimental"]) @app.get("/sso/key/generate", tags=["experimental"], include_in_schema=False)
async def google_login(request: Request): async def google_login(request: Request):
""" """
Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env
@ -12939,7 +12983,7 @@ def get_image():
return FileResponse(logo_path, media_type="image/jpeg") return FileResponse(logo_path, media_type="image/jpeg")
@app.get("/sso/callback", tags=["experimental"]) @app.get("/sso/callback", tags=["experimental"], include_in_schema=False)
async def auth_callback(request: Request): async def auth_callback(request: Request):
"""Verify login""" """Verify login"""
global general_settings, ui_access_mode, premium_user global general_settings, ui_access_mode, premium_user
@ -13244,6 +13288,7 @@ async def auth_callback(request: Request):
tags=["Invite Links"], tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel, response_model=InvitationModel,
include_in_schema=False,
) )
async def new_invitation( async def new_invitation(
data: InvitationNew, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth) data: InvitationNew, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
@ -13308,6 +13353,7 @@ async def new_invitation(
tags=["Invite Links"], tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel, response_model=InvitationModel,
include_in_schema=False,
) )
async def invitation_info( async def invitation_info(
invitation_id: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth) invitation_id: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
@ -13359,6 +13405,7 @@ async def invitation_info(
tags=["Invite Links"], tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel, response_model=InvitationModel,
include_in_schema=False,
) )
async def invitation_update( async def invitation_update(
data: InvitationUpdate, data: InvitationUpdate,
@ -13419,6 +13466,7 @@ async def invitation_update(
tags=["Invite Links"], tags=["Invite Links"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
response_model=InvitationModel, response_model=InvitationModel,
include_in_schema=False,
) )
async def invitation_delete( async def invitation_delete(
data: InvitationDelete, data: InvitationDelete,
@ -13471,6 +13519,7 @@ async def invitation_delete(
"/config/update", "/config/update",
tags=["config.yaml"], tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def update_config(config_info: ConfigYAML): async def update_config(config_info: ConfigYAML):
""" """
@ -13628,6 +13677,7 @@ Keep it more precise, to prevent overwrite other values unintentially
"/config/field/update", "/config/field/update",
tags=["config.yaml"], tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def update_config_general_settings( async def update_config_general_settings(
data: ConfigFieldUpdate, data: ConfigFieldUpdate,
@ -13706,6 +13756,7 @@ async def update_config_general_settings(
tags=["config.yaml"], tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
response_model=ConfigFieldInfo, response_model=ConfigFieldInfo,
include_in_schema=False,
) )
async def get_config_general_settings( async def get_config_general_settings(
field_name: str, field_name: str,
@ -13766,6 +13817,7 @@ async def get_config_general_settings(
"/config/list", "/config/list",
tags=["config.yaml"], tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def get_config_list( async def get_config_list(
config_type: Literal["general_settings"], config_type: Literal["general_settings"],
@ -13842,6 +13894,7 @@ async def get_config_list(
"/config/field/delete", "/config/field/delete",
tags=["config.yaml"], tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def delete_config_general_settings( async def delete_config_general_settings(
data: ConfigFieldDelete, data: ConfigFieldDelete,
@ -14097,6 +14150,7 @@ async def get_config():
"/config/yaml", "/config/yaml",
tags=["config.yaml"], tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
) )
async def config_yaml_endpoint(config_info: ConfigYAML): async def config_yaml_endpoint(config_info: ConfigYAML):
""" """
@ -14743,6 +14797,22 @@ async def cache_flushall():
) )
@router.get(
"/get/litellm_model_cost_map",
include_in_schema=False,
dependencies=[Depends(user_api_key_auth)],
)
async def get_litellm_model_cost_map():
try:
_model_cost_map = litellm.model_cost
return _model_cost_map
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Internal Server Error ({str(e)})",
)
@router.get("/", dependencies=[Depends(user_api_key_auth)]) @router.get("/", dependencies=[Depends(user_api_key_auth)])
async def home(request: Request): async def home(request: Request):
return "LiteLLM: RUNNING" return "LiteLLM: RUNNING"

View file

@ -1,4 +1,4 @@
from typing import Optional, List, Any, Literal, Union, TYPE_CHECKING from typing import Optional, List, Any, Literal, Union, TYPE_CHECKING, Tuple
import os import os
import subprocess import subprocess
import hashlib import hashlib
@ -2103,14 +2103,32 @@ def get_logging_payload(
raise e raise e
def _duration_in_seconds(duration: str): def _extract_from_regex(duration: str) -> Tuple[int, str]:
match = re.match(r"(\d+)([smhd]?)", duration) match = re.match(r"(\d+)(mo|[smhd]?)", duration)
if not match: if not match:
raise ValueError("Invalid duration format") raise ValueError("Invalid duration format")
value, unit = match.groups() value, unit = match.groups()
value = int(value) value = int(value)
return value, unit
def _duration_in_seconds(duration: str) -> int:
"""
Parameters:
- duration:
- "<number>s" - seconds
- "<number>m" - minutes
- "<number>h" - hours
- "<number>d" - days
- "<number>mo" - months
Returns time in seconds till when budget needs to be reset
"""
value, unit = _extract_from_regex(duration=duration)
if unit == "s": if unit == "s":
return value return value
elif unit == "m": elif unit == "m":
@ -2119,6 +2137,22 @@ def _duration_in_seconds(duration: str):
return value * 3600 return value * 3600
elif unit == "d": elif unit == "d":
return value * 86400 return value * 86400
elif unit == "mo":
now = time.time()
current_time = datetime.fromtimestamp(now)
# Calculate the first day of the next month
if current_time.month == 12:
next_month = datetime(year=current_time.year + 1, month=1, day=1)
else:
next_month = datetime(
year=current_time.year, month=current_time.month + value, day=1
)
# Calculate the duration until the first day of the next month
duration_until_next_month = next_month - current_time
return int(duration_until_next_month.total_seconds())
else: else:
raise ValueError("Unsupported duration unit") raise ValueError("Unsupported duration unit")

View file

@ -3618,6 +3618,7 @@ class Router:
except Exception: except Exception:
model_info = None model_info = None
# get llm provider # get llm provider
model, llm_provider = "", ""
try: try:
model, llm_provider, _, _ = litellm.get_llm_provider( model, llm_provider, _, _ = litellm.get_llm_provider(
model=litellm_params.model, model=litellm_params.model,

View file

@ -503,13 +503,35 @@ async def test_async_vertexai_streaming_response():
# asyncio.run(test_async_vertexai_streaming_response()) # asyncio.run(test_async_vertexai_streaming_response())
def test_gemini_pro_vision(): @pytest.mark.parametrize("provider", ["vertex_ai"]) # "vertex_ai_beta"
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_gemini_pro_vision(provider, sync_mode):
try: try:
load_vertex_ai_credentials() load_vertex_ai_credentials()
litellm.set_verbose = True litellm.set_verbose = True
litellm.num_retries = 3 litellm.num_retries = 3
if sync_mode:
resp = litellm.completion( resp = litellm.completion(
model="vertex_ai/gemini-1.5-flash-preview-0514", model="{}/gemini-1.5-flash-preview-0514".format(provider),
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
},
},
],
}
],
)
else:
resp = await litellm.acompletion(
model="{}/gemini-1.5-flash-preview-0514".format(provider),
messages=[ messages=[
{ {
"role": "user", "role": "user",
@ -532,6 +554,8 @@ def test_gemini_pro_vision():
# DO Not DELETE this ASSERT # DO Not DELETE this ASSERT
# Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response # Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response
assert prompt_tokens == 263 # the gemini api returns 263 to us assert prompt_tokens == 263 # the gemini api returns 263 to us
# assert False
except litellm.RateLimitError as e: except litellm.RateLimitError as e:
pass pass
except Exception as e: except Exception as e:
@ -591,9 +615,111 @@ def test_gemini_pro_vision_base64():
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
@pytest.mark.parametrize("sync_mode", [True]) # "vertex_ai",
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_gemini_pro_function_calling(sync_mode): async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
try:
load_vertex_ai_credentials()
litellm.set_verbose = True
messages = [
{
"role": "system",
"content": "Your name is Litellm Bot, you are a helpful assistant",
},
# User asks for their name and weather in San Francisco
{
"role": "user",
"content": "Hello, what is your name and can you tell me the weather?",
},
]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}
},
"required": ["location"],
},
},
}
]
data = {
"model": "{}/gemini-1.5-pro".format(provider),
"messages": messages,
"tools": tools,
"tool_choice": "required",
}
if sync_mode:
response = litellm.completion(**data)
else:
response = await litellm.acompletion(**data)
print(f"response: {response}")
assert response.choices[0].message.tool_calls[0].function.arguments is not None
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)
except litellm.RateLimitError as e:
pass
except Exception as e:
if "429 Quota exceeded" in str(e):
pass
else:
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
@pytest.mark.asyncio
async def test_gemini_pro_json_schema_httpx(provider):
load_vertex_ai_credentials()
litellm.set_verbose = True
messages = [
{
"role": "user",
"content": """
List 5 popular cookie recipes.
Using this JSON schema:
Recipe = {"recipe_name": str}
Return a `list[Recipe]`
""",
}
]
response = completion(
model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
messages=messages,
response_format={"type": "json_object"},
)
assert response.choices[0].message.content is not None
response_json = json.loads(response.choices[0].message.content)
assert isinstance(response_json, dict) or isinstance(response_json, list)
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
@pytest.mark.parametrize("sync_mode", [True])
@pytest.mark.parametrize("provider", ["vertex_ai"])
@pytest.mark.asyncio
async def test_gemini_pro_function_calling(provider, sync_mode):
try: try:
load_vertex_ai_credentials() load_vertex_ai_credentials()
litellm.set_verbose = True litellm.set_verbose = True
@ -655,7 +781,7 @@ async def test_gemini_pro_function_calling(sync_mode):
] ]
data = { data = {
"model": "vertex_ai/gemini-1.5-pro-preview-0514", "model": "{}/gemini-1.5-pro-preview-0514".format(provider),
"messages": messages, "messages": messages,
"tools": tools, "tools": tools,
} }
@ -810,14 +936,24 @@ def test_vertexai_embedding():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(
reason="new test - works locally running into vertex version issues on ci/cd"
)
def test_vertexai_embedding_embedding_latest(): def test_vertexai_embedding_embedding_latest():
try: try:
load_vertex_ai_credentials() load_vertex_ai_credentials()
litellm.set_verbose = True litellm.set_verbose = True
response = embedding( response = embedding(
model="vertex_ai/text-embedding-004", model="vertex_ai/text-embedding-004",
input=["good morning from litellm", "this is another item"], input=["hi"],
dimensions=1,
auto_truncate=True,
task_type="RETRIEVAL_QUERY",
) )
assert len(response.data[0]["embedding"]) == 1
assert response.usage.prompt_tokens > 0
print(f"response:", response) print(f"response:", response)
except litellm.RateLimitError as e: except litellm.RateLimitError as e:
pass pass

View file

@ -220,13 +220,13 @@ def test_completion_bedrock_claude_sts_oidc_auth():
aws_web_identity_token = "oidc/circleci_v2/" aws_web_identity_token = "oidc/circleci_v2/"
aws_region_name = os.environ["AWS_REGION_NAME"] aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] # aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually # TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci" aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
try: try:
litellm.set_verbose = True litellm.set_verbose = True
response = completion( response_1 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0", model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages, messages=messages,
max_tokens=10, max_tokens=10,
@ -236,8 +236,40 @@ def test_completion_bedrock_claude_sts_oidc_auth():
aws_role_name=aws_role_name, aws_role_name=aws_role_name,
aws_session_name="my-test-session", aws_session_name="my-test-session",
) )
# Add any assertions here to check the response print(response_1)
print(response) assert len(response_1.choices) > 0
assert len(response_1.choices[0].message.content) > 0
# This second call is to verify that the cache isn't breaking anything
response_2 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=5,
temperature=0.2,
aws_region_name=aws_region_name,
aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
print(response_2)
assert len(response_2.choices) > 0
assert len(response_2.choices[0].message.content) > 0
# This third call is to verify that the cache isn't used for a different region
response_3 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=6,
temperature=0.3,
aws_region_name="us-east-1",
aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
print(response_3)
assert len(response_3.choices) > 0
assert len(response_3.choices[0].message.content) > 0
except RateLimitError: except RateLimitError:
pass pass
except Exception as e: except Exception as e:
@ -255,7 +287,7 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
aws_web_identity_token = "oidc/circleci_v2/" aws_web_identity_token = "oidc/circleci_v2/"
aws_region_name = os.environ["AWS_REGION_NAME"] aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] # aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually # TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci" aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
try: try:

View file

@ -7,6 +7,7 @@ sys.path.insert(
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import litellm import litellm
from litellm import get_model_info from litellm import get_model_info
import pytest
def test_get_model_info_simple_model_name(): def test_get_model_info_simple_model_name():
@ -23,3 +24,16 @@ def test_get_model_info_custom_llm_with_model_name():
""" """
model = "anthropic/claude-3-opus-20240229" model = "anthropic/claude-3-opus-20240229"
litellm.get_model_info(model) litellm.get_model_info(model)
def test_get_model_info_custom_llm_with_same_name_vllm():
"""
Tests if {custom_llm_provider}/{model_name} name given, and model exists in model info, the object is returned
"""
model = "command-r-plus"
provider = "openai" # vllm is openai-compatible
try:
litellm.get_model_info(model, custom_llm_provider=provider)
pytest.fail("Expected get model info to fail for an unmapped model/provider")
except Exception:
pass

View file

@ -2217,6 +2217,7 @@ async def test_create_update_team(prisma_client):
tpm_limit=30, tpm_limit=30,
rpm_limit=30, rpm_limit=30,
), ),
http_request=Request(scope={"type": "http"}),
user_api_key_dict=UserAPIKeyAuth( user_api_key_dict=UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN, user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234", api_key="sk-1234",

View file

@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
# Define the expected log messages # Define the expected log messages
# - error request, falling back notice, success notice # - error request, falling back notice, success notice
expected_logs = [ expected_logs = [
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception AuthenticationError: OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m", "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception litellm.AuthenticationError: AuthenticationError: OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
"Falling back to model_group = azure/gpt-3.5-turbo", "Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m", "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
"Successful fallback b/w models.", "Successful fallback b/w models.",

View file

@ -557,7 +557,13 @@ async def test_completion_predibase_streaming(sync_mode):
print(f"complete_response: {complete_response}") print(f"complete_response: {complete_response}")
except litellm.Timeout as e: except litellm.Timeout as e:
pass pass
except litellm.InternalServerError as e:
pass
except Exception as e: except Exception as e:
print("ERROR class", e.__class__)
print("ERROR message", e)
print("ERROR traceback", traceback.format_exc())
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -1029,7 +1035,8 @@ def test_completion_claude_stream_bad_key():
# test_completion_replicate_stream() # test_completion_replicate_stream()
def test_vertex_ai_stream(): @pytest.mark.parametrize("provider", ["vertex_ai"]) # "vertex_ai_beta"
def test_vertex_ai_stream(provider):
from litellm.tests.test_amazing_vertex_completion import load_vertex_ai_credentials from litellm.tests.test_amazing_vertex_completion import load_vertex_ai_credentials
load_vertex_ai_credentials() load_vertex_ai_credentials()
@ -1042,7 +1049,7 @@ def test_vertex_ai_stream():
try: try:
print("making request", model) print("making request", model)
response = completion( response = completion(
model=model, model="{}/{}".format(provider, model),
messages=[ messages=[
{"role": "user", "content": "write 10 line code code for saying hi"} {"role": "user", "content": "write 10 line code code for saying hi"}
], ],

View file

@ -3,6 +3,7 @@ from unittest import mock
from dotenv import load_dotenv from dotenv import load_dotenv
import copy import copy
from datetime import datetime
load_dotenv() load_dotenv()
import os import os
@ -25,6 +26,7 @@ from litellm.utils import (
get_max_tokens, get_max_tokens,
get_supported_openai_params, get_supported_openai_params,
) )
from litellm.proxy.utils import _duration_in_seconds, _extract_from_regex
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils' # Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
@ -395,3 +397,89 @@ def test_get_supported_openai_params() -> None:
# Unmapped provider # Unmapped provider
assert get_supported_openai_params("nonexistent") is None assert get_supported_openai_params("nonexistent") is None
def test_redact_msgs_from_logs():
"""
Tests that turn_off_message_logging does not modify the response_obj
On the proxy some users were seeing the redaction impact client side responses
"""
from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_logging,
)
from litellm.utils import Logging
litellm.turn_off_message_logging = True
response_obj = litellm.ModelResponse(
choices=[
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.",
"role": "assistant",
},
}
]
)
_redacted_response_obj = redact_message_input_output_from_logging(
result=response_obj,
litellm_logging_obj=Logging(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hi"}],
stream=False,
call_type="acompletion",
litellm_call_id="1234",
start_time=datetime.now(),
function_id="1234",
),
)
# Assert the response_obj content is NOT modified
assert (
response_obj.choices[0].message.content
== "I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner."
)
litellm.turn_off_message_logging = False
print("Test passed")
@pytest.mark.parametrize(
"duration, unit",
[("7s", "s"), ("7m", "m"), ("7h", "h"), ("7d", "d"), ("7mo", "mo")],
)
def test_extract_from_regex(duration, unit):
value, _unit = _extract_from_regex(duration=duration)
assert value == 7
assert _unit == unit
def test_duration_in_seconds():
"""
Test if duration int is correctly calculated for different str
"""
import time
now = time.time()
current_time = datetime.fromtimestamp(now)
print("current_time={}".format(current_time))
# Calculate the first day of the next month
if current_time.month == 12:
next_month = datetime(year=current_time.year + 1, month=1, day=1)
else:
next_month = datetime(
year=current_time.year, month=current_time.month + 1, day=1
)
print("next_month={}".format(next_month))
# Calculate the duration until the first day of the next month
duration_until_next_month = next_month - current_time
expected_duration = int(duration_until_next_month.total_seconds())
value = _duration_in_seconds(duration="1mo")
assert value - expected_duration < 2

View file

@ -323,3 +323,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
content: Optional[str] content: Optional[str]
tool_calls: List[ChatCompletionToolCallChunk] tool_calls: List[ChatCompletionToolCallChunk]
role: Literal["assistant"] role: Literal["assistant"]
class ChatCompletionUsageBlock(TypedDict):
prompt_tokens: int
completion_tokens: int
total_tokens: int

View file

@ -9,6 +9,7 @@ from typing_extensions import (
runtime_checkable, runtime_checkable,
Required, Required,
) )
from enum import Enum
class Field(TypedDict): class Field(TypedDict):
@ -48,6 +49,190 @@ class PartType(TypedDict, total=False):
function_response: FunctionResponse function_response: FunctionResponse
class HttpxFunctionCall(TypedDict):
name: str
args: dict
class HttpxPartType(TypedDict, total=False):
text: str
inline_data: BlobType
file_data: FileDataType
functionCall: HttpxFunctionCall
function_response: FunctionResponse
class HttpxContentType(TypedDict, total=False):
role: Literal["user", "model"]
parts: Required[List[HttpxPartType]]
class ContentType(TypedDict, total=False): class ContentType(TypedDict, total=False):
role: Literal["user", "model"] role: Literal["user", "model"]
parts: Required[List[PartType]] parts: Required[List[PartType]]
class SystemInstructions(TypedDict):
parts: Required[List[PartType]]
class Schema(TypedDict, total=False):
type: Literal["STRING", "INTEGER", "BOOLEAN", "NUMBER", "ARRAY", "OBJECT"]
description: str
enum: List[str]
items: List["Schema"]
properties: "Schema"
required: List[str]
nullable: bool
class FunctionDeclaration(TypedDict, total=False):
name: Required[str]
description: str
parameters: Schema
response: Schema
class FunctionCallingConfig(TypedDict, total=False):
mode: Literal["ANY", "AUTO", "NONE"]
allowed_function_names: List[str]
HarmCategory = Literal[
"HARM_CATEGORY_UNSPECIFIED",
"HARM_CATEGORY_HATE_SPEECH",
"HARM_CATEGORY_DANGEROUS_CONTENT",
"HARM_CATEGORY_HARASSMENT",
"HARM_CATEGORY_SEXUALLY_EXPLICIT",
]
HarmBlockThreshold = Literal[
"HARM_BLOCK_THRESHOLD_UNSPECIFIED",
"BLOCK_LOW_AND_ABOVE",
"BLOCK_MEDIUM_AND_ABOVE",
"BLOCK_ONLY_HIGH",
"BLOCK_NONE",
]
HarmBlockMethod = Literal["HARM_BLOCK_METHOD_UNSPECIFIED", "SEVERITY", "PROBABILITY"]
HarmProbability = Literal[
"HARM_PROBABILITY_UNSPECIFIED", "NEGLIGIBLE", "LOW", "MEDIUM", "HIGH"
]
HarmSeverity = Literal[
"HARM_SEVERITY_UNSPECIFIED",
"HARM_SEVERITY_NEGLIGIBLE",
"HARM_SEVERITY_LOW",
"HARM_SEVERITY_MEDIUM",
"HARM_SEVERITY_HIGH",
]
class SafetSettingsConfig(TypedDict, total=False):
category: HarmCategory
threshold: HarmBlockThreshold
max_influential_terms: int
method: HarmBlockMethod
class GenerationConfig(TypedDict, total=False):
temperature: float
top_p: float
top_k: float
candidate_count: int
max_output_tokens: int
stop_sequences: List[str]
presence_penalty: float
frequency_penalty: float
response_mime_type: Literal["text/plain", "application/json"]
class Tools(TypedDict):
function_declarations: List[FunctionDeclaration]
class ToolConfig(TypedDict):
functionCallingConfig: FunctionCallingConfig
class RequestBody(TypedDict, total=False):
contents: Required[List[ContentType]]
system_instruction: SystemInstructions
tools: Tools
toolConfig: ToolConfig
safetySettings: SafetSettingsConfig
generationConfig: GenerationConfig
class SafetyRatings(TypedDict):
category: HarmCategory
probability: HarmProbability
probabilityScore: int
severity: HarmSeverity
blocked: bool
class Date(TypedDict):
year: int
month: int
date: int
class Citation(TypedDict):
startIndex: int
endIndex: int
uri: str
title: str
license: str
publicationDate: Date
class CitationMetadata(TypedDict):
citations: List[Citation]
class SearchEntryPoint(TypedDict, total=False):
renderedContent: str
sdkBlob: str
class GroundingMetadata(TypedDict, total=False):
webSearchQueries: List[str]
searchEntryPoint: SearchEntryPoint
class Candidates(TypedDict, total=False):
index: int
content: HttpxContentType
finishReason: Literal[
"FINISH_REASON_UNSPECIFIED",
"STOP",
"MAX_TOKENS",
"SAFETY",
"RECITATION",
"OTHER",
"BLOCKLIST",
"PROHIBITED_CONTENT",
"SPII",
]
safetyRatings: SafetyRatings
citationMetadata: CitationMetadata
groundingMetadata: GroundingMetadata
finishMessage: str
class PromptFeedback(TypedDict):
blockReason: str
safetyRatings: List[SafetyRatings]
blockReasonMessage: str
class UsageMetadata(TypedDict):
promptTokenCount: int
totalTokenCount: int
candidatesTokenCount: int
class GenerateContentResponseBody(TypedDict, total=False):
candidates: Required[List[Candidates]]
promptFeedback: PromptFeedback
usageMetadata: Required[UsageMetadata]

View file

@ -1,6 +1,8 @@
from typing import List, Optional, Union, Dict, Tuple, Literal from typing import List, Optional, Union, Dict, Tuple, Literal
from typing_extensions import TypedDict from typing_extensions import TypedDict
from enum import Enum from enum import Enum
from typing_extensions import override, Required, Dict
from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk
class LiteLLMCommonStrings(Enum): class LiteLLMCommonStrings(Enum):
@ -37,3 +39,12 @@ class ModelInfo(TypedDict):
"completion", "embedding", "image_generation", "chat", "audio_transcription" "completion", "embedding", "image_generation", "chat", "audio_transcription"
] ]
supported_openai_params: Optional[List[str]] supported_openai_params: Optional[List[str]]
class GenericStreamingChunk(TypedDict):
text: Required[str]
tool_use: Optional[ChatCompletionToolCallChunk]
is_finished: Required[bool]
finish_reason: Required[str]
usage: Optional[ChatCompletionUsageBlock]
index: int

View file

@ -35,6 +35,9 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.types.utils import CostPerToken, ProviderField, ModelInfo from litellm.types.utils import CostPerToken, ProviderField, ModelInfo
from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_logging,
)
oidc_cache = DualCache() oidc_cache = DualCache()
@ -518,15 +521,18 @@ class Choices(OpenAIObject):
self, self,
finish_reason=None, finish_reason=None,
index=0, index=0,
message=None, message: Optional[Union[Message, dict]] = None,
logprobs=None, logprobs=None,
enhancements=None, enhancements=None,
**params, **params,
): ):
super(Choices, self).__init__(**params) super(Choices, self).__init__(**params)
self.finish_reason = ( if finish_reason is not None:
map_finish_reason(finish_reason) or "stop" self.finish_reason = map_finish_reason(
finish_reason
) # set finish_reason for all responses ) # set finish_reason for all responses
else:
self.finish_reason = "stop"
self.index = index self.index = index
if message is None: if message is None:
self.message = Message() self.message = Message()
@ -1134,13 +1140,15 @@ class TranscriptionResponse(OpenAIObject):
def print_verbose( def print_verbose(
print_statement, print_statement,
logger_only: bool = False, logger_only: bool = False,
log_level: Literal["DEBUG", "INFO"] = "DEBUG", log_level: Literal["DEBUG", "INFO", "ERROR"] = "DEBUG",
): ):
try: try:
if log_level == "DEBUG": if log_level == "DEBUG":
verbose_logger.debug(print_statement) verbose_logger.debug(print_statement)
elif log_level == "INFO": elif log_level == "INFO":
verbose_logger.info(print_statement) verbose_logger.info(print_statement)
elif log_level == "ERROR":
verbose_logger.error(print_statement)
if litellm.set_verbose == True and logger_only == False: if litellm.set_verbose == True and logger_only == False:
print(print_statement) # noqa print(print_statement) # noqa
except: except:
@ -1473,7 +1481,9 @@ class Logging:
print_verbose( print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
) )
self.redact_message_input_output_from_logging(result=original_response) original_response = redact_message_input_output_from_logging(
litellm_logging_obj=self, result=original_response
)
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
callbacks = litellm.input_callback + self.dynamic_input_callbacks callbacks = litellm.input_callback + self.dynamic_input_callbacks
@ -1624,6 +1634,12 @@ class Logging:
end_time=end_time, end_time=end_time,
) )
except Exception as e: except Exception as e:
print_verbose(
"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
str(e), traceback.format_exc()
),
log_level="ERROR",
)
complete_streaming_response = None complete_streaming_response = None
else: else:
self.sync_streaming_chunks.append(result) self.sync_streaming_chunks.append(result)
@ -1664,7 +1680,9 @@ class Logging:
else: else:
callbacks = litellm.success_callback callbacks = litellm.success_callback
self.redact_message_input_output_from_logging(result=result) result = redact_message_input_output_from_logging(
result=result, litellm_logging_obj=self
)
for callback in callbacks: for callback in callbacks:
try: try:
@ -2214,7 +2232,10 @@ class Logging:
capture_exception(e) capture_exception(e)
except: except:
print_verbose( print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
str(e), traceback.format_exc()
),
log_level="ERROR",
) )
pass pass
@ -2224,7 +2245,7 @@ class Logging:
""" """
Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
""" """
print_verbose(f"Logging Details LiteLLM-Async Success Call") print_verbose("Logging Details LiteLLM-Async Success Call")
start_time, end_time, result = self._success_handler_helper_fn( start_time, end_time, result = self._success_handler_helper_fn(
start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
) )
@ -2243,7 +2264,10 @@ class Logging:
) )
except Exception as e: except Exception as e:
print_verbose( print_verbose(
f"Error occurred building stream chunk: {traceback.format_exc()}" "Error occurred building stream chunk in success logging: {}\n{}".format(
str(e), traceback.format_exc()
),
log_level="ERROR",
) )
complete_streaming_response = None complete_streaming_response = None
else: else:
@ -2254,7 +2278,7 @@ class Logging:
complete_streaming_response complete_streaming_response
) )
try: try:
if self.model_call_details.get("cache_hit", False) == True: if self.model_call_details.get("cache_hit", False) is True:
self.model_call_details["response_cost"] = 0.0 self.model_call_details["response_cost"] = 0.0
else: else:
# check if base_model set on azure # check if base_model set on azure
@ -2270,8 +2294,8 @@ class Logging:
f"Model={self.model}; cost={self.model_call_details['response_cost']}" f"Model={self.model}; cost={self.model_call_details['response_cost']}"
) )
except litellm.NotFoundError as e: except litellm.NotFoundError as e:
verbose_logger.debug( verbose_logger.error(
f"Model={self.model} not found in completion cost map." f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
) )
self.model_call_details["response_cost"] = None self.model_call_details["response_cost"] = None
@ -2291,7 +2315,9 @@ class Logging:
else: else:
callbacks = litellm._async_success_callback callbacks = litellm._async_success_callback
self.redact_message_input_output_from_logging(result=result) result = redact_message_input_output_from_logging(
result=result, litellm_logging_obj=self
)
for callback in callbacks: for callback in callbacks:
# check if callback can run for this request # check if callback can run for this request
@ -2501,7 +2527,9 @@ class Logging:
result = None # result sent to all loggers, init this to None incase it's not created result = None # result sent to all loggers, init this to None incase it's not created
self.redact_message_input_output_from_logging(result=result) result = redact_message_input_output_from_logging(
result=result, litellm_logging_obj=self
)
for callback in callbacks: for callback in callbacks:
try: try:
if callback == "lite_debugger": if callback == "lite_debugger":
@ -2725,41 +2753,6 @@ class Logging:
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
) )
def redact_message_input_output_from_logging(self, result):
"""
Removes messages, prompts, input, response from logging. This modifies the data in-place
only redacts when litellm.turn_off_message_logging == True
"""
# check if user opted out of logging message/response to callbacks
if litellm.turn_off_message_logging is True:
# remove messages, prompts, input, response from logging
self.model_call_details["messages"] = [
{"role": "user", "content": "redacted-by-litellm"}
]
self.model_call_details["prompt"] = ""
self.model_call_details["input"] = ""
# response cleaning
# ChatCompletion Responses
if self.stream and "complete_streaming_response" in self.model_call_details:
_streaming_response = self.model_call_details[
"complete_streaming_response"
]
for choice in _streaming_response.choices:
if isinstance(choice, litellm.Choices):
choice.message.content = "redacted-by-litellm"
elif isinstance(choice, litellm.utils.StreamingChoices):
choice.delta.content = "redacted-by-litellm"
else:
if result is not None:
if isinstance(result, litellm.ModelResponse):
if hasattr(result, "choices") and result.choices is not None:
for choice in result.choices:
if isinstance(choice, litellm.Choices):
choice.message.content = "redacted-by-litellm"
elif isinstance(choice, litellm.utils.StreamingChoices):
choice.delta.content = "redacted-by-litellm"
def exception_logging( def exception_logging(
additional_args={}, additional_args={},
@ -2822,7 +2815,9 @@ class Rules:
raise litellm.APIResponseValidationError(message="LLM Response failed post-call-rule check", llm_provider="", model=model) # type: ignore raise litellm.APIResponseValidationError(message="LLM Response failed post-call-rule check", llm_provider="", model=model) # type: ignore
return True return True
def post_call_rules(self, input: str, model: str): def post_call_rules(self, input: Optional[str], model: str) -> bool:
if input is None:
return True
for rule in litellm.post_call_rules: for rule in litellm.post_call_rules:
if callable(rule): if callable(rule):
decision = rule(input) decision = rule(input)
@ -3101,9 +3096,9 @@ def client(original_function):
pass pass
else: else:
if isinstance(original_response, ModelResponse): if isinstance(original_response, ModelResponse):
model_response = original_response["choices"][0]["message"][ model_response = original_response.choices[
"content" 0
] ].message.content
### POST-CALL RULES ### ### POST-CALL RULES ###
rules_obj.post_call_rules(input=model_response, model=model) rules_obj.post_call_rules(input=model_response, model=model)
except Exception as e: except Exception as e:
@ -3563,7 +3558,7 @@ def client(original_function):
if cached_result is not None and not isinstance( if cached_result is not None and not isinstance(
cached_result, list cached_result, list
): ):
print_verbose(f"Cache Hit!") print_verbose("Cache Hit!", log_level="INFO")
cache_hit = True cache_hit = True
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
( (
@ -4898,6 +4893,18 @@ def get_optional_params_embeddings(
) )
final_params = {**optional_params, **kwargs} final_params = {**optional_params, **kwargs}
return final_params return final_params
if custom_llm_provider == "vertex_ai":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="vertex_ai",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.VertexAITextEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if custom_llm_provider == "vertex_ai": if custom_llm_provider == "vertex_ai":
if len(non_default_params.keys()) > 0: if len(non_default_params.keys()) > 0:
if litellm.drop_params is True: # drop the unsupported non-default values if litellm.drop_params is True: # drop the unsupported non-default values
@ -4931,7 +4938,18 @@ def get_optional_params_embeddings(
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.", message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
) )
return {**non_default_params, **kwargs} return {**non_default_params, **kwargs}
if custom_llm_provider == "mistral":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="mistral",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if ( if (
custom_llm_provider != "openai" custom_llm_provider != "openai"
and custom_llm_provider != "azure" and custom_llm_provider != "azure"
@ -5381,6 +5399,16 @@ def get_optional_params(
print_verbose( print_verbose(
f"(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {optional_params}" f"(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {optional_params}"
) )
elif custom_llm_provider == "vertex_ai_beta":
supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.VertexGeminiConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
)
elif ( elif (
custom_llm_provider == "vertex_ai" and model in litellm.vertex_anthropic_models custom_llm_provider == "vertex_ai" and model in litellm.vertex_anthropic_models
): ):
@ -6340,7 +6368,10 @@ def get_supported_openai_params(
"max_retries", "max_retries",
] ]
elif custom_llm_provider == "mistral": elif custom_llm_provider == "mistral":
if request_type == "chat_completion":
return litellm.MistralConfig().get_supported_openai_params() return litellm.MistralConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "replicate": elif custom_llm_provider == "replicate":
return [ return [
"stream", "stream",
@ -6382,7 +6413,10 @@ def get_supported_openai_params(
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini": elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"] return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
elif custom_llm_provider == "vertex_ai": elif custom_llm_provider == "vertex_ai":
if request_type == "chat_completion":
return litellm.VertexAIConfig().get_supported_openai_params() return litellm.VertexAIConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "sagemaker": elif custom_llm_provider == "sagemaker":
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"] return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
elif custom_llm_provider == "aleph_alpha": elif custom_llm_provider == "aleph_alpha":
@ -6919,13 +6953,14 @@ def get_max_tokens(model: str):
) )
def get_model_info(model: str) -> ModelInfo: def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
""" """
Get a dict for the maximum tokens (context window), Get a dict for the maximum tokens (context window),
input_cost_per_token, output_cost_per_token for a given model. input_cost_per_token, output_cost_per_token for a given model.
Parameters: Parameters:
model (str): The name of the model. - model (str): The name of the model.
- custom_llm_provider (str | null): the provider used for the model. If provided, used to check if the litellm model info is for that provider.
Returns: Returns:
dict: A dictionary containing the following information: dict: A dictionary containing the following information:
@ -6979,12 +7014,14 @@ def get_model_info(model: str) -> ModelInfo:
if model in azure_llms: if model in azure_llms:
model = azure_llms[model] model = azure_llms[model]
########################## ##########################
if custom_llm_provider is None:
# Get custom_llm_provider # Get custom_llm_provider
split_model, custom_llm_provider = model, ""
try: try:
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model) split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
except: except:
pass pass
else:
split_model = model
######################### #########################
supported_openai_params = litellm.get_supported_openai_params( supported_openai_params = litellm.get_supported_openai_params(
@ -7009,10 +7046,20 @@ def get_model_info(model: str) -> ModelInfo:
if model in litellm.model_cost: if model in litellm.model_cost:
_model_info = litellm.model_cost[model] _model_info = litellm.model_cost[model]
_model_info["supported_openai_params"] = supported_openai_params _model_info["supported_openai_params"] = supported_openai_params
if (
"litellm_provider" in _model_info
and _model_info["litellm_provider"] != custom_llm_provider
):
raise Exception
return _model_info return _model_info
if split_model in litellm.model_cost: if split_model in litellm.model_cost:
_model_info = litellm.model_cost[split_model] _model_info = litellm.model_cost[split_model]
_model_info["supported_openai_params"] = supported_openai_params _model_info["supported_openai_params"] = supported_openai_params
if (
"litellm_provider" in _model_info
and _model_info["litellm_provider"] != custom_llm_provider
):
raise Exception
return _model_info return _model_info
else: else:
raise ValueError( raise ValueError(
@ -7192,6 +7239,9 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
elif custom_llm_provider == "ollama": elif custom_llm_provider == "ollama":
return litellm.OllamaConfig().get_required_params() return litellm.OllamaConfig().get_required_params()
elif custom_llm_provider == "azure_ai":
return litellm.AzureAIStudioConfig().get_required_params()
else: else:
return [] return []
@ -10066,6 +10116,14 @@ def get_secret(
return oidc_token return oidc_token
else: else:
raise ValueError("Github OIDC provider failed") raise ValueError("Github OIDC provider failed")
elif oidc_provider == "azure":
# https://azure.github.io/azure-workload-identity/docs/quick-start.html
azure_federated_token_file = os.getenv("AZURE_FEDERATED_TOKEN_FILE")
if azure_federated_token_file is None:
raise ValueError("AZURE_FEDERATED_TOKEN_FILE not found in environment")
with open(azure_federated_token_file, "r") as f:
oidc_token = f.read()
return oidc_token
else: else:
raise ValueError("Unsupported OIDC provider") raise ValueError("Unsupported OIDC provider")
@ -11218,6 +11276,34 @@ class CustomStreamWrapper:
) )
else: else:
completion_obj["content"] = str(chunk) completion_obj["content"] = str(chunk)
elif self.custom_llm_provider and (
self.custom_llm_provider == "vertex_ai_beta"
):
from litellm.types.utils import (
GenericStreamingChunk as UtilsStreamingChunk,
)
if self.received_finish_reason is not None:
raise StopIteration
response_obj: UtilsStreamingChunk = chunk
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
self.received_finish_reason = response_obj["finish_reason"]
if (
self.stream_options
and self.stream_options.get("include_usage", False) is True
and response_obj["usage"] is not None
):
self.sent_stream_usage = True
model_response.usage = litellm.Usage(
prompt_tokens=response_obj["usage"]["prompt_tokens"],
completion_tokens=response_obj["usage"]["completion_tokens"],
total_tokens=response_obj["usage"]["total_tokens"],
)
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
completion_obj["tool_calls"] = [response_obj["tool_use"]]
elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"): elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
import proto # type: ignore import proto # type: ignore
@ -11895,6 +11981,7 @@ class CustomStreamWrapper:
or self.custom_llm_provider == "ollama" or self.custom_llm_provider == "ollama"
or self.custom_llm_provider == "ollama_chat" or self.custom_llm_provider == "ollama_chat"
or self.custom_llm_provider == "vertex_ai" or self.custom_llm_provider == "vertex_ai"
or self.custom_llm_provider == "vertex_ai_beta"
or self.custom_llm_provider == "sagemaker" or self.custom_llm_provider == "sagemaker"
or self.custom_llm_provider == "gemini" or self.custom_llm_provider == "gemini"
or self.custom_llm_provider == "replicate" or self.custom_llm_provider == "replicate"

10
log.txt Normal file
View file

@ -0,0 +1,10 @@
============================= test session starts ==============================
platform darwin -- Python 3.11.4, pytest-8.2.0, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/litellm/proxy/myenv/bin/python3.11
cachedir: .pytest_cache
rootdir: /Users/krrishdholakia/Documents/litellm
configfile: pyproject.toml
plugins: logfire-0.35.0, asyncio-0.23.6, mock-3.14.0, anyio-4.2.0
asyncio: mode=Mode.STRICT
collecting ... collected 0 items
============================ no tests ran in 0.00s =============================

View file

@ -3347,6 +3347,24 @@
"litellm_provider": "deepinfra", "litellm_provider": "deepinfra",
"mode": "chat" "mode": "chat"
}, },
"deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
"max_tokens": 8191,
"max_input_tokens": 8191,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000008,
"output_cost_per_token": 0.00000008,
"litellm_provider": "deepinfra",
"mode": "chat"
},
"deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": {
"max_tokens": 8191,
"max_input_tokens": 8191,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000059,
"output_cost_per_token": 0.00000079,
"litellm_provider": "deepinfra",
"mode": "chat"
},
"deepinfra/01-ai/Yi-34B-200K": { "deepinfra/01-ai/Yi-34B-200K": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,

View file

@ -85,6 +85,9 @@ model_list:
litellm_params: litellm_params:
model: openai/* model: openai/*
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
- model_name: mistral-embed
litellm_params:
model: mistral/mistral-embed
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model - model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
litellm_params: litellm_params:
model: text-completion-openai/gpt-3.5-turbo-instruct model: text-completion-openai/gpt-3.5-turbo-instruct

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.40.9" version = "1.40.12"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -85,7 +85,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.40.9" version = "1.40.12"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -22,6 +22,7 @@ async def generate_key(
"text-embedding-ada-002", "text-embedding-ada-002",
"dall-e-2", "dall-e-2",
"fake-openai-endpoint-2", "fake-openai-endpoint-2",
"mistral-embed",
], ],
): ):
url = "http://0.0.0.0:4000/key/generate" url = "http://0.0.0.0:4000/key/generate"
@ -197,14 +198,14 @@ async def completion(session, key):
return response return response
async def embeddings(session, key): async def embeddings(session, key, model="text-embedding-ada-002"):
url = "http://0.0.0.0:4000/embeddings" url = "http://0.0.0.0:4000/embeddings"
headers = { headers = {
"Authorization": f"Bearer {key}", "Authorization": f"Bearer {key}",
"Content-Type": "application/json", "Content-Type": "application/json",
} }
data = { data = {
"model": "text-embedding-ada-002", "model": model,
"input": ["hello world"], "input": ["hello world"],
} }
@ -408,6 +409,9 @@ async def test_embeddings():
key_2 = key_gen["key"] key_2 = key_gen["key"]
await embeddings(session=session, key=key_2) await embeddings(session=session, key=key_2)
# embedding request with non OpenAI model
await embeddings(session=session, key=key, model="mistral-embed")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_image_generation(): async def test_image_generation():

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -1 +0,0 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

Some files were not shown because too many files have changed in this diff Show more