mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge branch 'main' into litellm_redis_cache_usage
This commit is contained in:
commit
c373f104cc
131 changed files with 3117 additions and 476 deletions
|
@ -202,6 +202,7 @@ jobs:
|
|||
-e REDIS_PORT=$REDIS_PORT \
|
||||
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
|
||||
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
|
||||
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
|
||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||
|
|
|
@ -4,6 +4,7 @@ import Image from '@theme/IdealImage';
|
|||
|
||||
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
|
||||
|
||||
**on SDK**
|
||||
```python
|
||||
# pip install langfuse
|
||||
import litellm
|
||||
|
@ -33,6 +34,13 @@ response = litellm.completion(
|
|||
)
|
||||
```
|
||||
|
||||
**on Proxy**
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
log_raw_request_response: True
|
||||
```
|
||||
|
||||
**Expected Log**
|
||||
|
||||
<Image img={require('../../img/raw_request_log.png')}/>
|
|
@ -68,6 +68,7 @@ response = litellm.completion(
|
|||
|
||||
| Model Name | Function Call |
|
||||
|------------------|----------------------------------------|
|
||||
| gpt-4o | `completion('azure/<your deployment name>', messages)` |
|
||||
| gpt-4 | `completion('azure/<your deployment name>', messages)` |
|
||||
| gpt-4-0314 | `completion('azure/<your deployment name>', messages)` |
|
||||
| gpt-4-0613 | `completion('azure/<your deployment name>', messages)` |
|
||||
|
@ -85,7 +86,8 @@ response = litellm.completion(
|
|||
## Azure OpenAI Vision Models
|
||||
| Model Name | Function Call |
|
||||
|-----------------------|-----------------------------------------------------------------|
|
||||
| gpt-4-vision | `response = completion(model="azure/<your deployment name>", messages=messages)` |
|
||||
| gpt-4-vision | `completion(model="azure/<your deployment name>", messages=messages)` |
|
||||
| gpt-4o | `completion('azure/<your deployment name>', messages)` |
|
||||
|
||||
#### Usage
|
||||
```python
|
||||
|
|
|
@ -144,16 +144,135 @@ print(response)
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Set temperature, top p, etc.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||
os.environ["AWS_REGION_NAME"] = ""
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
temperature=0.7,
|
||||
top_p=1
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
**Set on yaml**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: bedrock-claude-v1
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-instant-v1
|
||||
temperature: <your-temp>
|
||||
top_p: <your-top-p>
|
||||
```
|
||||
|
||||
**Set on request**
|
||||
|
||||
```python
|
||||
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
temperature=0.7,
|
||||
top_p=1
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Pass provider-specific params
|
||||
|
||||
If you pass a non-openai param to litellm, we'll assume it's provider-specific and send it as a kwarg in the request body. [See more](../completion/input.md#provider-specific-params)
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||
os.environ["AWS_REGION_NAME"] = ""
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
**Set on yaml**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: bedrock-claude-v1
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-instant-v1
|
||||
top_k: 1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||
```
|
||||
|
||||
**Set on request**
|
||||
|
||||
```python
|
||||
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
temperature=0.7,
|
||||
extra_body={
|
||||
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Usage - Function Calling
|
||||
|
||||
:::info
|
||||
|
||||
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
|
||||
|
||||
You can see the raw response via `response._hidden_params["original_response"]`.
|
||||
|
||||
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
|
||||
:::
|
||||
LiteLLM uses Bedrock's Converse API for making tool calls
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
# Clarifai
|
||||
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
|
||||
|
||||
:::warning
|
||||
|
||||
Streaming is not yet supported on using clarifai and litellm. Tracking support here: https://github.com/BerriAI/litellm/issues/4162
|
||||
|
||||
:::
|
||||
|
||||
## Pre-Requisites
|
||||
|
||||
`pip install clarifai`
|
||||
|
||||
`pip install litellm`
|
||||
|
||||
## Required Environment Variables
|
||||
|
@ -12,6 +15,7 @@ To obtain your Clarifai Personal access token follow this [link](https://docs.cl
|
|||
|
||||
```python
|
||||
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
||||
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
@ -68,7 +72,7 @@ Example Usage - Note: liteLLM supports all models deployed on Clarifai
|
|||
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
|
||||
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
|
||||
|
||||
## Mistal LLMs
|
||||
## Mistral LLMs
|
||||
| Model Name | Function Call |
|
||||
|---------------------------------------------|------------------------------------------------------------------------|
|
||||
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |
|
||||
|
|
|
@ -8,6 +8,152 @@ import TabItem from '@theme/TabItem';
|
|||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
||||
## 🆕 `vertex_ai_beta/` route
|
||||
|
||||
New `vertex_ai_beta/` route. Adds support for system messages, tool_choice params, etc. by moving to httpx client (instead of vertex sdk).
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import json
|
||||
|
||||
## GET CREDENTIALS
|
||||
file_path = 'path/to/vertex_ai_service_account.json'
|
||||
|
||||
# Load the JSON file
|
||||
with open(file_path, 'r') as file:
|
||||
vertex_credentials = json.load(file)
|
||||
|
||||
# Convert to JSON string
|
||||
vertex_credentials_json = json.dumps(vertex_credentials)
|
||||
|
||||
## COMPLETION CALL
|
||||
response = completion(
|
||||
model="vertex_ai_beta/gemini-pro",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
vertex_credentials=vertex_credentials_json
|
||||
)
|
||||
```
|
||||
|
||||
### **System Message**
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import json
|
||||
|
||||
## GET CREDENTIALS
|
||||
file_path = 'path/to/vertex_ai_service_account.json'
|
||||
|
||||
# Load the JSON file
|
||||
with open(file_path, 'r') as file:
|
||||
vertex_credentials = json.load(file)
|
||||
|
||||
# Convert to JSON string
|
||||
vertex_credentials_json = json.dumps(vertex_credentials)
|
||||
|
||||
|
||||
response = completion(
|
||||
model="vertex_ai_beta/gemini-pro",
|
||||
messages=[{"content": "You are a good bot.","role": "system"}, {"content": "Hello, how are you?","role": "user"}],
|
||||
vertex_credentials=vertex_credentials_json
|
||||
)
|
||||
```
|
||||
|
||||
### **Function Calling**
|
||||
|
||||
Force Gemini to make tool calls with `tool_choice="required"`.
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import json
|
||||
|
||||
## GET CREDENTIALS
|
||||
file_path = 'path/to/vertex_ai_service_account.json'
|
||||
|
||||
# Load the JSON file
|
||||
with open(file_path, 'r') as file:
|
||||
vertex_credentials = json.load(file)
|
||||
|
||||
# Convert to JSON string
|
||||
vertex_credentials_json = json.dumps(vertex_credentials)
|
||||
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Your name is Litellm Bot, you are a helpful assistant",
|
||||
},
|
||||
# User asks for their name and weather in San Francisco
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, what is your name and can you tell me the weather?",
|
||||
},
|
||||
]
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
data = {
|
||||
"model": "vertex_ai_beta/gemini-1.5-pro-preview-0514"),
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"tool_choice": "required",
|
||||
"vertex_credentials": vertex_credentials_json
|
||||
}
|
||||
|
||||
## COMPLETION CALL
|
||||
print(completion(**data))
|
||||
```
|
||||
|
||||
### **JSON Schema**
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## GET CREDENTIALS
|
||||
file_path = 'path/to/vertex_ai_service_account.json'
|
||||
|
||||
# Load the JSON file
|
||||
with open(file_path, 'r') as file:
|
||||
vertex_credentials = json.load(file)
|
||||
|
||||
# Convert to JSON string
|
||||
vertex_credentials_json = json.dumps(vertex_credentials)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": """
|
||||
List 5 popular cookie recipes.
|
||||
|
||||
Using this JSON schema:
|
||||
|
||||
Recipe = {"recipe_name": str}
|
||||
|
||||
Return a `list[Recipe]`
|
||||
"""
|
||||
}
|
||||
]
|
||||
|
||||
completion(model="vertex_ai_beta/gemini-1.5-flash-preview-0514", messages=messages, response_format={ "type": "json_object" })
|
||||
```
|
||||
|
||||
## Pre-requisites
|
||||
* `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
|
||||
* Authentication:
|
||||
|
@ -140,7 +286,7 @@ In certain use-cases you may need to make calls to the models and pass [safety s
|
|||
|
||||
```python
|
||||
response = completion(
|
||||
model="gemini/gemini-pro",
|
||||
model="vertex_ai/gemini-pro",
|
||||
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
|
||||
safety_settings=[
|
||||
{
|
||||
|
@ -363,8 +509,8 @@ response = completion(
|
|||
## Gemini 1.5 Pro (and Vision)
|
||||
| Model Name | Function Call |
|
||||
|------------------|--------------------------------------|
|
||||
| gemini-1.5-pro | `completion('gemini-1.5-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
||||
| gemini-1.5-flash-preview-0514 | `completion('gemini-1.5-flash-preview-0514', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
||||
| gemini-1.5-pro | `completion('gemini-1.5-pro', messages)`, `completion('vertex_ai/gemini-1.5-pro', messages)` |
|
||||
| gemini-1.5-flash-preview-0514 | `completion('gemini-1.5-flash-preview-0514', messages)`, `completion('vertex_ai/gemini-1.5-flash-preview-0514', messages)` |
|
||||
| gemini-1.5-pro-preview-0514 | `completion('gemini-1.5-pro-preview-0514', messages)`, `completion('vertex_ai/gemini-1.5-pro-preview-0514', messages)` |
|
||||
|
||||
|
||||
|
@ -558,6 +704,29 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
|
|||
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
|
||||
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
|
||||
|
||||
### Advanced Use `task_type` and `title` (Vertex Specific Params)
|
||||
|
||||
👉 `task_type` and `title` are vertex specific params
|
||||
|
||||
LiteLLM Supported Vertex Specific Params
|
||||
|
||||
```python
|
||||
auto_truncate: Optional[bool] = None
|
||||
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
|
||||
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||
```
|
||||
|
||||
**Example Usage with LiteLLM**
|
||||
```python
|
||||
response = litellm.embedding(
|
||||
model="vertex_ai/text-embedding-004",
|
||||
input=["good morning from litellm", "gm"]
|
||||
task_type = "RETRIEVAL_DOCUMENT",
|
||||
dimensions=1,
|
||||
auto_truncate=True,
|
||||
)
|
||||
```
|
||||
|
||||
## Image Generation Models
|
||||
|
||||
Usage
|
||||
|
@ -657,6 +826,3 @@ s/o @[Darien Kindlund](https://www.linkedin.com/in/kindlund/) for this tutorial
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# 🚨 Alerting / Webhooks
|
||||
|
||||
Get alerts for:
|
||||
|
@ -15,6 +17,11 @@ Get alerts for:
|
|||
- **Spend** Weekly & Monthly spend per Team, Tag
|
||||
|
||||
|
||||
Works across:
|
||||
- [Slack](#quick-start)
|
||||
- [Discord](#advanced---using-discord-webhooks)
|
||||
- [Microsoft Teams](#advanced---using-ms-teams-webhooks)
|
||||
|
||||
## Quick Start
|
||||
|
||||
Set up a slack alert channel to receive alerts from proxy.
|
||||
|
@ -108,6 +115,48 @@ AlertType = Literal[
|
|||
```
|
||||
|
||||
|
||||
## Advanced - Using MS Teams Webhooks
|
||||
|
||||
MS Teams provides a slack compatible webhook url that you can use for alerting
|
||||
|
||||
##### Quick Start
|
||||
|
||||
1. [Get a webhook url](https://learn.microsoft.com/en-us/microsoftteams/platform/webhooks-and-connectors/how-to/add-incoming-webhook?tabs=newteams%2Cdotnet#create-an-incoming-webhook) for your Microsoft Teams channel
|
||||
|
||||
2. Add it to your .env
|
||||
|
||||
```bash
|
||||
SLACK_WEBHOOK_URL="https://berriai.webhook.office.com/webhookb2/...6901/IncomingWebhook/b55fa0c2a48647be8e6effedcd540266/e04b1092-4a3e-44a2-ab6b-29a0a4854d1d"
|
||||
```
|
||||
|
||||
3. Add it to your litellm config
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
model_name: "azure-model"
|
||||
litellm_params:
|
||||
model: "azure/gpt-35-turbo"
|
||||
api_key: "my-bad-key" # 👈 bad key
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
||||
```
|
||||
|
||||
4. Run health check!
|
||||
|
||||
Call the proxy `/health/services` endpoint to test if your alerting connection is correctly setup.
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/health/services?service=slack' \
|
||||
--header 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
|
||||
**Expected Response**
|
||||
|
||||
<Image img={require('../../img/ms_teams_alerting.png')}/>
|
||||
|
||||
## Advanced - Using Discord Webhooks
|
||||
|
||||
Discord provides a slack compatible webhook url that you can use for alerting
|
||||
|
@ -139,7 +188,6 @@ environment_variables:
|
|||
SLACK_WEBHOOK_URL: "https://discord.com/api/webhooks/1240030362193760286/cTLWt5ATn1gKmcy_982rl5xmYHsrM1IWJdmCL1AyOmU9JdQXazrp8L1_PYgUtgxj8x4f/slack"
|
||||
```
|
||||
|
||||
That's it ! You're ready to go !
|
||||
|
||||
## Advanced - [BETA] Webhooks for Budget Alerts
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
import Image from '@theme/IdealImage';
|
||||
|
||||
# 🐳 Docker, Deploying LiteLLM Proxy
|
||||
|
||||
|
@ -537,7 +538,9 @@ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
|||
|
||||
## Advanced Deployment Settings
|
||||
|
||||
### Customization of the server root path
|
||||
### 1. Customization of the server root path (custom Proxy base url)
|
||||
|
||||
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -548,9 +551,29 @@ In a Kubernetes deployment, it's possible to utilize a shared DNS to host multip
|
|||
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
|
||||
|
||||
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
|
||||
```
|
||||
export SERVER_ROOT_PATH="/api/v1"
|
||||
```
|
||||
|
||||
**Step 1. Run Proxy with `SERVER_ROOT_PATH` set in your env **
|
||||
|
||||
### Setting SSL Certification
|
||||
```shell
|
||||
docker run --name litellm-proxy \
|
||||
-e DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname> \
|
||||
-e SERVER_ROOT_PATH="/api/v1" \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
||||
```
|
||||
|
||||
After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (since we set `SERVER_ROOT_PATH="/api/v1"`)
|
||||
|
||||
**Step 2. Verify Running on correct path**
|
||||
|
||||
<Image img={require('../../img/custom_root_path.png')} />
|
||||
|
||||
**That's it**, that's all you need to run the proxy on a custom root path
|
||||
|
||||
### 2. Setting SSL Certification
|
||||
|
||||
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Grafana, Prometheus metrics [BETA]
|
||||
# 📈 Prometheus metrics [BETA]
|
||||
|
||||
LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
|
||||
|
||||
|
@ -54,6 +54,13 @@ http://localhost:4000/metrics
|
|||
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
||||
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
|
||||
|
||||
### Budget Metrics
|
||||
| Metric Name | Description |
|
||||
|----------------------|--------------------------------------|
|
||||
| `litellm_remaining_team_budget_metric` | Remaining Budget for Team (A team created on LiteLLM) |
|
||||
| `litellm_remaining_api_key_budget_metric` | Remaining Budget for API Key (A key Created on LiteLLM)|
|
||||
|
||||
|
||||
## Monitor System Health
|
||||
|
||||
To monitor the health of litellm adjacent services (redis / postgres), do:
|
||||
|
|
|
@ -124,3 +124,17 @@ LiteLLM Enterprise: Enable [SSO login](./ui.md#setup-ssoauth-for-ui)
|
|||
|
||||
|
||||
<Image img={require('../../img/ui_self_serve_create_key.png')} style={{ width: '800px', height: 'auto' }} />
|
||||
|
||||
|
||||
## Advanced
|
||||
### Setting custom logout URLs
|
||||
|
||||
Set `PROXY_LOGOUT_URL` in your .env if you want users to get redirected to a specific URL when they click logout
|
||||
|
||||
```
|
||||
export PROXY_LOGOUT_URL="https://www.google.com"
|
||||
```
|
||||
|
||||
<Image img={require('../../img/ui_logout.png')} style={{ width: '400px', height: 'auto' }} />
|
||||
|
||||
|
||||
|
|
123
docs/my-website/docs/proxy/team_budgets.md
Normal file
123
docs/my-website/docs/proxy/team_budgets.md
Normal file
|
@ -0,0 +1,123 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# 💰 Setting Team Budgets
|
||||
|
||||
Track spend, set budgets for your Internal Team
|
||||
|
||||
## Setting Monthly Team Budgets
|
||||
|
||||
### 1. Create a team
|
||||
- Set `max_budget=000000001` ($ value the team is allowed to spend)
|
||||
- Set `budget_duration="1d"` (How frequently the budget should update)
|
||||
|
||||
|
||||
Create a new team and set `max_budget` and `budget_duration`
|
||||
```shell
|
||||
curl -X POST 'http://0.0.0.0:4000/team/new' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"team_alias": "QA Prod Bot",
|
||||
"max_budget": 0.000000001,
|
||||
"budget_duration": "1d"
|
||||
}'
|
||||
```
|
||||
|
||||
Response
|
||||
```shell
|
||||
{
|
||||
"team_alias": "QA Prod Bot",
|
||||
"team_id": "de35b29e-6ca8-4f47-b804-2b79d07aa99a",
|
||||
"max_budget": 0.0001,
|
||||
"budget_duration": "1d",
|
||||
"budget_reset_at": "2024-06-14T22:48:36.594000Z"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
Possible values for `budget_duration`
|
||||
|
||||
| `budget_duration` | When Budget will reset |
|
||||
| --- | --- |
|
||||
| `budget_duration="1s"` | every 1 second |
|
||||
| `budget_duration="1m"` | every 1 min |
|
||||
| `budget_duration="1h"` | every 1 hour |
|
||||
| `budget_duration="1d"` | every 1 day |
|
||||
| `budget_duration="1mo"` | start of every month |
|
||||
|
||||
|
||||
### 2. Create a key for the `team`
|
||||
|
||||
Create a key for `team_id="de35b29e-6ca8-4f47-b804-2b79d07aa99a"` from Step 1
|
||||
|
||||
💡 **The Budget for Team="QA Prod Bot" budget will apply to this team**
|
||||
|
||||
```shell
|
||||
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"team_id": "de35b29e-6ca8-4f47-b804-2b79d07aa99a"}'
|
||||
```
|
||||
|
||||
Response
|
||||
|
||||
```shell
|
||||
{"team_id":"de35b29e-6ca8-4f47-b804-2b79d07aa99a", "key":"sk-5qtncoYjzRcxMM4bDRktNQ"}
|
||||
```
|
||||
|
||||
|
||||
### 3. Test It
|
||||
|
||||
Use the key from step 2 and run this Request twice
|
||||
```shell
|
||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||
-H 'Authorization: Bearer sk-mso-JSykEGri86KyOvgxBw' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d ' {
|
||||
"model": "llama3",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hi"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
On the 2nd response - expect to see the following exception
|
||||
|
||||
```shell
|
||||
{
|
||||
"error": {
|
||||
"message": "Budget has been exceeded! Current cost: 3.5e-06, Max budget: 1e-09",
|
||||
"type": "auth_error",
|
||||
"param": null,
|
||||
"code": 400
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced
|
||||
|
||||
### Prometheus metrics for `remaining_budget`
|
||||
|
||||
[More info about Prometheus metrics here](https://docs.litellm.ai/docs/proxy/prometheus)
|
||||
|
||||
You'll need the following in your proxy config.yaml
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
```
|
||||
|
||||
Expect to see this metric on prometheus to track the Remaining Budget for the team
|
||||
|
||||
```shell
|
||||
litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-6ca8-4f47-b804-2b79d07aa99a"} 9.699999999999992e-06
|
||||
```
|
||||
|
||||
|
BIN
docs/my-website/img/custom_root_path.png
Normal file
BIN
docs/my-website/img/custom_root_path.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 151 KiB |
BIN
docs/my-website/img/ms_teams_alerting.png
Normal file
BIN
docs/my-website/img/ms_teams_alerting.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 241 KiB |
BIN
docs/my-website/img/ui_logout.png
Normal file
BIN
docs/my-website/img/ui_logout.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 27 KiB |
|
@ -44,6 +44,7 @@ const sidebars = {
|
|||
"proxy/self_serve",
|
||||
"proxy/users",
|
||||
"proxy/customers",
|
||||
"proxy/team_budgets",
|
||||
"proxy/billing",
|
||||
"proxy/user_keys",
|
||||
"proxy/virtual_keys",
|
||||
|
@ -54,6 +55,7 @@ const sidebars = {
|
|||
items: ["proxy/logging", "proxy/streaming_logging"],
|
||||
},
|
||||
"proxy/ui",
|
||||
"proxy/prometheus",
|
||||
"proxy/email",
|
||||
"proxy/multiple_admins",
|
||||
"proxy/team_based_routing",
|
||||
|
@ -70,7 +72,6 @@ const sidebars = {
|
|||
"proxy/pii_masking",
|
||||
"proxy/prompt_injection",
|
||||
"proxy/caching",
|
||||
"proxy/prometheus",
|
||||
"proxy/call_hooks",
|
||||
"proxy/rules",
|
||||
"proxy/cli",
|
||||
|
|
|
@ -93,7 +93,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
|
|||
response.choices[0], litellm.utils.Choices
|
||||
):
|
||||
for word in self.banned_keywords_list:
|
||||
self.test_violation(test_str=response.choices[0].message.content)
|
||||
self.test_violation(test_str=response.choices[0].message.content or "")
|
||||
|
||||
async def async_post_call_streaming_hook(
|
||||
self,
|
||||
|
|
|
@ -73,7 +73,7 @@ token: Optional[str] = (
|
|||
)
|
||||
telemetry = True
|
||||
max_tokens = 256 # OpenAI Defaults
|
||||
drop_params = False
|
||||
drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
|
||||
modify_params = False
|
||||
retry = True
|
||||
### AUTH ###
|
||||
|
@ -605,6 +605,7 @@ provider_list: List = [
|
|||
"together_ai",
|
||||
"openrouter",
|
||||
"vertex_ai",
|
||||
"vertex_ai_beta",
|
||||
"palm",
|
||||
"gemini",
|
||||
"ai21",
|
||||
|
@ -765,7 +766,8 @@ from .llms.gemini import GeminiConfig
|
|||
from .llms.nlp_cloud import NLPCloudConfig
|
||||
from .llms.aleph_alpha import AlephAlphaConfig
|
||||
from .llms.petals import PetalsConfig
|
||||
from .llms.vertex_ai import VertexAIConfig
|
||||
from .llms.vertex_httpx import VertexGeminiConfig
|
||||
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
|
||||
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
|
||||
from .llms.sagemaker import SagemakerConfig
|
||||
from .llms.ollama import OllamaConfig
|
||||
|
@ -787,7 +789,9 @@ from .llms.openai import (
|
|||
OpenAIConfig,
|
||||
OpenAITextCompletionConfig,
|
||||
MistralConfig,
|
||||
MistralEmbeddingConfig,
|
||||
DeepInfraConfig,
|
||||
AzureAIStudioConfig,
|
||||
)
|
||||
from .llms.azure import (
|
||||
AzureOpenAIConfig,
|
||||
|
|
|
@ -1192,7 +1192,7 @@ class S3Cache(BaseCache):
|
|||
return cached_response
|
||||
except botocore.exceptions.ClientError as e:
|
||||
if e.response["Error"]["Code"] == "NoSuchKey":
|
||||
verbose_logger.error(
|
||||
verbose_logger.debug(
|
||||
f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
|
||||
)
|
||||
return None
|
||||
|
|
|
@ -26,7 +26,7 @@ class AuthenticationError(openai.AuthenticationError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 401
|
||||
self.message = message
|
||||
self.message = "litellm.AuthenticationError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -72,7 +72,7 @@ class NotFoundError(openai.NotFoundError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 404
|
||||
self.message = message
|
||||
self.message = "litellm.NotFoundError: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -117,7 +117,7 @@ class BadRequestError(openai.BadRequestError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 400
|
||||
self.message = message
|
||||
self.message = "litellm.BadRequestError: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -162,7 +162,7 @@ class UnprocessableEntityError(openai.UnprocessableEntityError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 422
|
||||
self.message = message
|
||||
self.message = "litellm.UnprocessableEntityError: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -204,7 +204,7 @@ class Timeout(openai.APITimeoutError): # type: ignore
|
|||
request=request
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
self.status_code = 408
|
||||
self.message = message
|
||||
self.message = "litellm.Timeout: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -241,7 +241,7 @@ class PermissionDeniedError(openai.PermissionDeniedError): # type:ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 403
|
||||
self.message = message
|
||||
self.message = "litellm.PermissionDeniedError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -280,7 +280,7 @@ class RateLimitError(openai.RateLimitError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 429
|
||||
self.message = message
|
||||
self.message = "litellm.RateLimitError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -328,7 +328,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
|
|||
litellm_debug_info: Optional[str] = None,
|
||||
):
|
||||
self.status_code = 400
|
||||
self.message = message
|
||||
self.message = "litellm.ContextWindowExceededError: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -368,7 +368,7 @@ class RejectedRequestError(BadRequestError): # type: ignore
|
|||
litellm_debug_info: Optional[str] = None,
|
||||
):
|
||||
self.status_code = 400
|
||||
self.message = message
|
||||
self.message = "litellm.RejectedRequestError: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -411,7 +411,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
|||
litellm_debug_info: Optional[str] = None,
|
||||
):
|
||||
self.status_code = 400
|
||||
self.message = message
|
||||
self.message = "litellm.ContentPolicyViolationError: {}".format(message)
|
||||
self.model = model
|
||||
self.llm_provider = llm_provider
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -452,7 +452,7 @@ class ServiceUnavailableError(openai.APIStatusError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 503
|
||||
self.message = message
|
||||
self.message = "litellm.ServiceUnavailableError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -501,7 +501,7 @@ class InternalServerError(openai.InternalServerError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = 500
|
||||
self.message = message
|
||||
self.message = "litellm.InternalServerError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -552,7 +552,7 @@ class APIError(openai.APIError): # type: ignore
|
|||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.status_code = status_code
|
||||
self.message = message
|
||||
self.message = "litellm.APIError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.litellm_debug_info = litellm_debug_info
|
||||
|
@ -589,7 +589,7 @@ class APIConnectionError(openai.APIConnectionError): # type: ignore
|
|||
max_retries: Optional[int] = None,
|
||||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.message = message
|
||||
self.message = "litellm.APIConnectionError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
self.status_code = 500
|
||||
|
@ -626,7 +626,7 @@ class APIResponseValidationError(openai.APIResponseValidationError): # type: ig
|
|||
max_retries: Optional[int] = None,
|
||||
num_retries: Optional[int] = None,
|
||||
):
|
||||
self.message = message
|
||||
self.message = "litellm.APIResponseValidationError: {}".format(message)
|
||||
self.llm_provider = llm_provider
|
||||
self.model = model
|
||||
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
||||
|
|
|
@ -8,6 +8,7 @@ import traceback
|
|||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
class PrometheusLogger:
|
||||
|
@ -17,33 +18,76 @@ class PrometheusLogger:
|
|||
**kwargs,
|
||||
):
|
||||
try:
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
name="litellm_llm_api_failed_requests_metric",
|
||||
documentation="Total number of failed LLM API calls via litellm",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
self.litellm_requests_metric = Counter(
|
||||
name="litellm_requests_metric",
|
||||
documentation="Total number of LLM calls to litellm",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
# Counter for spend
|
||||
self.litellm_spend_metric = Counter(
|
||||
"litellm_spend_metric",
|
||||
"Total spend on LLM requests",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
# Counter for total_output_tokens
|
||||
self.litellm_tokens_metric = Counter(
|
||||
"litellm_total_tokens",
|
||||
"Total number of input + output tokens from LLM requests",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team", "user"],
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
# Remaining Budget for Team
|
||||
self.litellm_remaining_team_budget_metric = Gauge(
|
||||
"litellm_remaining_team_budget_metric",
|
||||
"Remaining budget for team",
|
||||
labelnames=["team_id", "team_alias"],
|
||||
)
|
||||
|
||||
# Remaining Budget for API Key
|
||||
self.litellm_remaining_api_key_budget_metric = Gauge(
|
||||
"litellm_remaining_api_key_budget_metric",
|
||||
"Remaining budget for api key",
|
||||
labelnames=["hashed_api_key", "api_key_alias"],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
@ -51,7 +95,9 @@ class PrometheusLogger:
|
|||
async def _async_log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, print_verbose, user_id
|
||||
):
|
||||
self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
|
||||
self.log_event(
|
||||
kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
)
|
||||
|
||||
def log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
|
@ -72,9 +118,36 @@ class PrometheusLogger:
|
|||
"user_api_key_user_id", None
|
||||
)
|
||||
user_api_key = litellm_params.get("metadata", {}).get("user_api_key", None)
|
||||
user_api_key_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_alias", None
|
||||
)
|
||||
user_api_team = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_id", None
|
||||
)
|
||||
user_api_team_alias = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_alias", None
|
||||
)
|
||||
|
||||
_team_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_spend", None
|
||||
)
|
||||
_team_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_max_budget", None
|
||||
)
|
||||
_remaining_team_budget = safe_get_remaining_budget(
|
||||
max_budget=_team_max_budget, spend=_team_spend
|
||||
)
|
||||
|
||||
_api_key_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_spend", None
|
||||
)
|
||||
_api_key_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_max_budget", None
|
||||
)
|
||||
_remaining_api_key_budget = safe_get_remaining_budget(
|
||||
max_budget=_api_key_max_budget, spend=_api_key_spend
|
||||
)
|
||||
|
||||
if response_obj is not None:
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
|
@ -94,19 +167,47 @@ class PrometheusLogger:
|
|||
user_api_key = hash_token(user_api_key)
|
||||
|
||||
self.litellm_requests_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
self.litellm_spend_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(response_cost)
|
||||
self.litellm_tokens_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(tokens_used)
|
||||
|
||||
self.litellm_remaining_team_budget_metric.labels(
|
||||
user_api_team, user_api_team_alias
|
||||
).set(_remaining_team_budget)
|
||||
|
||||
self.litellm_remaining_api_key_budget_metric.labels(
|
||||
user_api_key, user_api_key_alias
|
||||
).set(_remaining_api_key_budget)
|
||||
|
||||
### FAILURE INCREMENT ###
|
||||
if "exception" in kwargs:
|
||||
self.litellm_llm_api_failed_requests_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team, user_id
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
|
@ -114,3 +215,15 @@ class PrometheusLogger:
|
|||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
pass
|
||||
|
||||
|
||||
def safe_get_remaining_budget(
|
||||
max_budget: Optional[float], spend: Optional[float]
|
||||
) -> float:
|
||||
if max_budget is None:
|
||||
return float("inf")
|
||||
|
||||
if spend is None:
|
||||
return max_budget
|
||||
|
||||
return max_budget - spend
|
||||
|
|
65
litellm/litellm_core_utils/redact_messages.py
Normal file
65
litellm/litellm_core_utils/redact_messages.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
# +-----------------------------------------------+
|
||||
# | |
|
||||
# | Give Feedback / Get Help |
|
||||
# | https://github.com/BerriAI/litellm/issues/new |
|
||||
# | |
|
||||
# +-----------------------------------------------+
|
||||
#
|
||||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
|
||||
import copy
|
||||
from typing import TYPE_CHECKING, Any
|
||||
import litellm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.utils import Logging as _LiteLLMLoggingObject
|
||||
|
||||
LiteLLMLoggingObject = _LiteLLMLoggingObject
|
||||
else:
|
||||
LiteLLMLoggingObject = Any
|
||||
|
||||
|
||||
def redact_message_input_output_from_logging(
|
||||
litellm_logging_obj: LiteLLMLoggingObject, result
|
||||
):
|
||||
"""
|
||||
Removes messages, prompts, input, response from logging. This modifies the data in-place
|
||||
only redacts when litellm.turn_off_message_logging == True
|
||||
"""
|
||||
# check if user opted out of logging message/response to callbacks
|
||||
if litellm.turn_off_message_logging is not True:
|
||||
return result
|
||||
|
||||
_result = copy.deepcopy(result)
|
||||
# remove messages, prompts, input, response from logging
|
||||
litellm_logging_obj.model_call_details["messages"] = [
|
||||
{"role": "user", "content": "redacted-by-litellm"}
|
||||
]
|
||||
litellm_logging_obj.model_call_details["prompt"] = ""
|
||||
litellm_logging_obj.model_call_details["input"] = ""
|
||||
|
||||
# response cleaning
|
||||
# ChatCompletion Responses
|
||||
if (
|
||||
litellm_logging_obj.stream is True
|
||||
and "complete_streaming_response" in litellm_logging_obj.model_call_details
|
||||
):
|
||||
_streaming_response = litellm_logging_obj.model_call_details[
|
||||
"complete_streaming_response"
|
||||
]
|
||||
for choice in _streaming_response.choices:
|
||||
if isinstance(choice, litellm.Choices):
|
||||
choice.message.content = "redacted-by-litellm"
|
||||
elif isinstance(choice, litellm.utils.StreamingChoices):
|
||||
choice.delta.content = "redacted-by-litellm"
|
||||
else:
|
||||
if _result is not None:
|
||||
if isinstance(_result, litellm.ModelResponse):
|
||||
if hasattr(_result, "choices") and _result.choices is not None:
|
||||
for choice in _result.choices:
|
||||
if isinstance(choice, litellm.Choices):
|
||||
choice.message.content = "redacted-by-litellm"
|
||||
elif isinstance(choice, litellm.utils.StreamingChoices):
|
||||
choice.delta.content = "redacted-by-litellm"
|
||||
|
||||
return _result
|
|
@ -36,6 +36,9 @@ from ..types.llms.openai import (
|
|||
AsyncAssistantStreamManager,
|
||||
AssistantStreamManager,
|
||||
)
|
||||
from litellm.caching import DualCache
|
||||
|
||||
azure_ad_cache = DualCache()
|
||||
|
||||
|
||||
class AzureOpenAIError(Exception):
|
||||
|
@ -309,9 +312,10 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
|
|||
|
||||
def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||
azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
|
||||
azure_tenant = os.getenv("AZURE_TENANT_ID", None)
|
||||
azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
|
||||
azure_authority_host = os.getenv("AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com")
|
||||
|
||||
if azure_client_id is None or azure_tenant is None:
|
||||
if azure_client_id is None or azure_tenant_id is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422,
|
||||
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
|
||||
|
@ -325,8 +329,19 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
|||
message="OIDC token could not be retrieved from secret manager.",
|
||||
)
|
||||
|
||||
azure_ad_token_cache_key = json.dumps({
|
||||
"azure_client_id": azure_client_id,
|
||||
"azure_tenant_id": azure_tenant_id,
|
||||
"azure_authority_host": azure_authority_host,
|
||||
"oidc_token": oidc_token,
|
||||
})
|
||||
|
||||
azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
|
||||
if azure_ad_token_access_token is not None:
|
||||
return azure_ad_token_access_token
|
||||
|
||||
req_token = httpx.post(
|
||||
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token",
|
||||
f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
|
||||
data={
|
||||
"client_id": azure_client_id,
|
||||
"grant_type": "client_credentials",
|
||||
|
@ -342,12 +357,23 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
|||
message=req_token.text,
|
||||
)
|
||||
|
||||
possible_azure_ad_token = req_token.json().get("access_token", None)
|
||||
azure_ad_token_json = req_token.json()
|
||||
azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
|
||||
azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
|
||||
|
||||
if possible_azure_ad_token is None:
|
||||
raise AzureOpenAIError(status_code=422, message="Azure AD Token not returned")
|
||||
if azure_ad_token_access_token is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message="Azure AD Token access_token not returned"
|
||||
)
|
||||
|
||||
return possible_azure_ad_token
|
||||
if azure_ad_token_expires_in is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message="Azure AD Token expires_in not returned"
|
||||
)
|
||||
|
||||
azure_ad_cache.set_cache(key=azure_ad_token_cache_key, value=azure_ad_token_access_token, ttl=azure_ad_token_expires_in)
|
||||
|
||||
return azure_ad_token_access_token
|
||||
|
||||
|
||||
class AzureChatCompletion(BaseLLM):
|
||||
|
|
|
@ -53,7 +53,9 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionDeltaChunk,
|
||||
)
|
||||
from litellm.caching import DualCache
|
||||
|
||||
iam_cache = DualCache()
|
||||
|
||||
class AmazonCohereChatConfig:
|
||||
"""
|
||||
|
@ -325,11 +327,16 @@ class BedrockLLM(BaseLLM):
|
|||
) = params_to_check
|
||||
|
||||
### CHECK STS ###
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
iam_creds_cache_key = json.dumps({
|
||||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
})
|
||||
|
||||
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||
if iam_creds_dict is None:
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
|
||||
if oidc_token is None:
|
||||
|
@ -338,7 +345,11 @@ class BedrockLLM(BaseLLM):
|
|||
status_code=401,
|
||||
)
|
||||
|
||||
sts_client = boto3.client("sts")
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
region_name=aws_region_name,
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
|
@ -349,14 +360,20 @@ class BedrockLLM(BaseLLM):
|
|||
DurationSeconds=3600,
|
||||
)
|
||||
|
||||
session = boto3.Session(
|
||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
||||
region_name=aws_region_name,
|
||||
)
|
||||
iam_creds_dict = {
|
||||
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||
"region_name": aws_region_name,
|
||||
}
|
||||
|
||||
return session.get_credentials()
|
||||
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
iam_creds = session.get_credentials()
|
||||
|
||||
return iam_creds
|
||||
elif aws_role_name is not None and aws_session_name is not None:
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
|
@ -1416,11 +1433,16 @@ class BedrockConverseLLM(BaseLLM):
|
|||
) = params_to_check
|
||||
|
||||
### CHECK STS ###
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
iam_creds_cache_key = json.dumps({
|
||||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
})
|
||||
|
||||
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||
if iam_creds_dict is None:
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
|
||||
if oidc_token is None:
|
||||
|
@ -1429,7 +1451,11 @@ class BedrockConverseLLM(BaseLLM):
|
|||
status_code=401,
|
||||
)
|
||||
|
||||
sts_client = boto3.client("sts")
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
region_name=aws_region_name,
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
|
@ -1440,14 +1466,20 @@ class BedrockConverseLLM(BaseLLM):
|
|||
DurationSeconds=3600,
|
||||
)
|
||||
|
||||
session = boto3.Session(
|
||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
||||
region_name=aws_region_name,
|
||||
)
|
||||
iam_creds_dict = {
|
||||
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||
"region_name": aws_region_name,
|
||||
}
|
||||
|
||||
return session.get_credentials()
|
||||
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
iam_creds = session.get_credentials()
|
||||
|
||||
return iam_creds
|
||||
elif aws_role_name is not None and aws_session_name is not None:
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
|
|
|
@ -139,6 +139,7 @@ def process_response(
|
|||
|
||||
def convert_model_to_url(model: str, api_base: str):
|
||||
user_id, app_id, model_id = model.split(".")
|
||||
model_id = model_id.lower()
|
||||
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
|
||||
|
||||
|
||||
|
@ -171,19 +172,55 @@ async def async_completion(
|
|||
|
||||
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||||
response = await async_handler.post(
|
||||
api_base, headers=headers, data=json.dumps(data)
|
||||
url=model, headers=headers, data=json.dumps(data)
|
||||
)
|
||||
|
||||
return process_response(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
response=response,
|
||||
model_response=model_response,
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key=api_key,
|
||||
data=data,
|
||||
encoding=encoding,
|
||||
logging_obj=logging_obj,
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = response.json()
|
||||
except Exception:
|
||||
raise ClarifaiError(
|
||||
message=response.text, status_code=response.status_code, url=model
|
||||
)
|
||||
# print(completion_response)
|
||||
try:
|
||||
choices_list = []
|
||||
for idx, item in enumerate(completion_response["outputs"]):
|
||||
if len(item["data"]["text"]["raw"]) > 0:
|
||||
message_obj = Message(content=item["data"]["text"]["raw"])
|
||||
else:
|
||||
message_obj = Message(content=None)
|
||||
choice_obj = Choices(
|
||||
finish_reason="stop",
|
||||
index=idx + 1, # check
|
||||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
|
||||
except Exception as e:
|
||||
raise ClarifaiError(
|
||||
message=traceback.format_exc(), status_code=response.status_code, url=model
|
||||
)
|
||||
|
||||
# Calculate Usage
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
||||
def completion(
|
||||
|
@ -241,7 +278,7 @@ def completion(
|
|||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"headers": headers,
|
||||
"api_base": api_base,
|
||||
"api_base": model,
|
||||
},
|
||||
)
|
||||
if acompletion == True:
|
||||
|
|
|
@ -12,6 +12,15 @@ class AsyncHTTPHandler:
|
|||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
concurrent_limit=1000,
|
||||
):
|
||||
self.timeout = timeout
|
||||
self.client = self.create_client(
|
||||
timeout=timeout, concurrent_limit=concurrent_limit
|
||||
)
|
||||
|
||||
def create_client(
|
||||
self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
|
||||
) -> httpx.AsyncClient:
|
||||
|
||||
async_proxy_mounts = None
|
||||
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
|
||||
http_proxy = os.getenv("HTTP_PROXY", None)
|
||||
|
@ -39,7 +48,8 @@ class AsyncHTTPHandler:
|
|||
if timeout is None:
|
||||
timeout = _DEFAULT_TIMEOUT
|
||||
# Create a client with a connection pool
|
||||
self.client = httpx.AsyncClient(
|
||||
|
||||
return httpx.AsyncClient(
|
||||
timeout=timeout,
|
||||
limits=httpx.Limits(
|
||||
max_connections=concurrent_limit,
|
||||
|
@ -83,11 +93,48 @@ class AsyncHTTPHandler:
|
|||
response = await self.client.send(req, stream=stream)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except httpx.RemoteProtocolError:
|
||||
# Retry the request with a new session if there is a connection error
|
||||
new_client = self.create_client(timeout=self.timeout, concurrent_limit=1)
|
||||
try:
|
||||
return await self.single_connection_post_request(
|
||||
url=url,
|
||||
client=new_client,
|
||||
data=data,
|
||||
json=json,
|
||||
params=params,
|
||||
headers=headers,
|
||||
stream=stream,
|
||||
)
|
||||
finally:
|
||||
await new_client.aclose()
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
async def single_connection_post_request(
|
||||
self,
|
||||
url: str,
|
||||
client: httpx.AsyncClient,
|
||||
data: Optional[Union[dict, str]] = None, # type: ignore
|
||||
json: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
headers: Optional[dict] = None,
|
||||
stream: bool = False,
|
||||
):
|
||||
"""
|
||||
Making POST request for a single connection client.
|
||||
|
||||
Used for retrying connection client errors.
|
||||
"""
|
||||
req = client.build_request(
|
||||
"POST", url, data=data, json=json, params=params, headers=headers # type: ignore
|
||||
)
|
||||
response = await client.send(req, stream=stream)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def __del__(self) -> None:
|
||||
try:
|
||||
asyncio.get_running_loop().create_task(self.close())
|
||||
|
|
|
@ -28,6 +28,7 @@ from .prompt_templates.factory import prompt_factory, custom_prompt
|
|||
from openai import OpenAI, AsyncOpenAI
|
||||
from ..types.llms.openai import *
|
||||
import openai
|
||||
from litellm.types.utils import ProviderField
|
||||
|
||||
|
||||
class OpenAIError(Exception):
|
||||
|
@ -164,6 +165,68 @@ class MistralConfig:
|
|||
return optional_params
|
||||
|
||||
|
||||
class MistralEmbeddingConfig:
|
||||
"""
|
||||
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"encoding_format",
|
||||
]
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "encoding_format":
|
||||
optional_params["encoding_format"] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
class AzureAIStudioConfig:
|
||||
def get_required_params(self) -> List[ProviderField]:
|
||||
"""For a given provider, return it's required fields with a description"""
|
||||
return [
|
||||
ProviderField(
|
||||
field_name="api_key",
|
||||
field_type="string",
|
||||
field_description="Your Azure AI Studio API Key.",
|
||||
field_value="zEJ...",
|
||||
),
|
||||
ProviderField(
|
||||
field_name="api_base",
|
||||
field_type="string",
|
||||
field_description="Your Azure AI Studio API Base.",
|
||||
field_value="https://Mistral-serverless.",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class DeepInfraConfig:
|
||||
"""
|
||||
Reference: https://deepinfra.com/docs/advanced/openai_api
|
||||
|
|
|
@ -4,6 +4,7 @@ from enum import Enum
|
|||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Union, List, Literal, Any
|
||||
from pydantic import BaseModel
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||
import litellm, uuid
|
||||
import httpx, inspect # type: ignore
|
||||
|
@ -12,7 +13,12 @@ from litellm.llms.prompt_templates.factory import (
|
|||
convert_to_gemini_tool_call_result,
|
||||
convert_to_gemini_tool_call_invoke,
|
||||
)
|
||||
from litellm.types.files import get_file_mime_type_for_file_type, get_file_type_from_extension, is_gemini_1_5_accepted_file_type, is_video_file_type
|
||||
from litellm.types.files import (
|
||||
get_file_mime_type_for_file_type,
|
||||
get_file_type_from_extension,
|
||||
is_gemini_1_5_accepted_file_type,
|
||||
is_video_file_type,
|
||||
)
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
@ -611,7 +617,7 @@ def completion(
|
|||
llm_model = None
|
||||
|
||||
# NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
|
||||
if acompletion == True:
|
||||
if acompletion is True:
|
||||
data = {
|
||||
"llm_model": llm_model,
|
||||
"mode": mode,
|
||||
|
@ -643,7 +649,7 @@ def completion(
|
|||
tools = optional_params.pop("tools", None)
|
||||
content = _gemini_convert_messages_with_history(messages=messages)
|
||||
stream = optional_params.pop("stream", False)
|
||||
if stream == True:
|
||||
if stream is True:
|
||||
request_str += f"response = llm_model.generate_content({content}, generation_config=GenerationConfig(**{optional_params}), safety_settings={safety_settings}, stream={stream})\n"
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
|
@ -1293,6 +1299,95 @@ async def async_streaming(
|
|||
return streamwrapper
|
||||
|
||||
|
||||
class VertexAITextEmbeddingConfig(BaseModel):
|
||||
"""
|
||||
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
|
||||
|
||||
Args:
|
||||
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
|
||||
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
|
||||
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||
"""
|
||||
|
||||
auto_truncate: Optional[bool] = None
|
||||
task_type: Optional[
|
||||
Literal[
|
||||
"RETRIEVAL_QUERY",
|
||||
"RETRIEVAL_DOCUMENT",
|
||||
"SEMANTIC_SIMILARITY",
|
||||
"CLASSIFICATION",
|
||||
"CLUSTERING",
|
||||
"QUESTION_ANSWERING",
|
||||
"FACT_VERIFICATION",
|
||||
]
|
||||
] = None
|
||||
title: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
auto_truncate: Optional[bool] = None,
|
||||
task_type: Optional[
|
||||
Literal[
|
||||
"RETRIEVAL_QUERY",
|
||||
"RETRIEVAL_DOCUMENT",
|
||||
"SEMANTIC_SIMILARITY",
|
||||
"CLASSIFICATION",
|
||||
"CLUSTERING",
|
||||
"QUESTION_ANSWERING",
|
||||
"FACT_VERIFICATION",
|
||||
]
|
||||
] = None,
|
||||
title: Optional[str] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"dimensions",
|
||||
]
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "dimensions":
|
||||
optional_params["output_dimensionality"] = value
|
||||
return optional_params
|
||||
|
||||
def get_mapped_special_auth_params(self) -> dict:
|
||||
"""
|
||||
Common auth params across bedrock/vertex_ai/azure/watsonx
|
||||
"""
|
||||
return {"project": "vertex_project", "region_name": "vertex_location"}
|
||||
|
||||
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
||||
mapped_params = self.get_mapped_special_auth_params()
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param in mapped_params:
|
||||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
def embedding(
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
|
@ -1316,7 +1411,7 @@ def embedding(
|
|||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||||
)
|
||||
|
||||
from vertexai.language_models import TextEmbeddingModel
|
||||
from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
|
||||
import google.auth # type: ignore
|
||||
|
||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||
|
@ -1347,6 +1442,16 @@ def embedding(
|
|||
if isinstance(input, str):
|
||||
input = [input]
|
||||
|
||||
if optional_params is not None and isinstance(optional_params, dict):
|
||||
if optional_params.get("task_type") or optional_params.get("title"):
|
||||
# if user passed task_type or title, cast to TextEmbeddingInput
|
||||
_task_type = optional_params.pop("task_type", None)
|
||||
_title = optional_params.pop("title", None)
|
||||
input = [
|
||||
TextEmbeddingInput(text=x, task_type=_task_type, title=_title)
|
||||
for x in input
|
||||
]
|
||||
|
||||
try:
|
||||
llm_model = TextEmbeddingModel.from_pretrained(model)
|
||||
except Exception as e:
|
||||
|
@ -1363,7 +1468,8 @@ def embedding(
|
|||
encoding=encoding,
|
||||
)
|
||||
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
|
||||
_input_dict = {"texts": input, **optional_params}
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
|
||||
## LOGGING PRE-CALL
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
|
@ -1375,7 +1481,7 @@ def embedding(
|
|||
)
|
||||
|
||||
try:
|
||||
embeddings = llm_model.get_embeddings(input)
|
||||
embeddings = llm_model.get_embeddings(**_input_dict)
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=500, message=str(e))
|
||||
|
||||
|
@ -1383,6 +1489,7 @@ def embedding(
|
|||
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
||||
## Populate OpenAI compliant dictionary
|
||||
embedding_response = []
|
||||
input_tokens: int = 0
|
||||
for idx, embedding in enumerate(embeddings):
|
||||
embedding_response.append(
|
||||
{
|
||||
|
@ -1391,14 +1498,10 @@ def embedding(
|
|||
"embedding": embedding.values,
|
||||
}
|
||||
)
|
||||
input_tokens += embedding.statistics.token_count
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = 0
|
||||
|
||||
input_str = "".join(input)
|
||||
|
||||
input_tokens += len(encoding.encode(input_str))
|
||||
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
|
@ -1420,7 +1523,8 @@ async def async_embedding(
|
|||
"""
|
||||
Async embedding implementation
|
||||
"""
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
|
||||
_input_dict = {"texts": input, **optional_params}
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
|
||||
## LOGGING PRE-CALL
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
|
@ -1432,7 +1536,7 @@ async def async_embedding(
|
|||
)
|
||||
|
||||
try:
|
||||
embeddings = await client.get_embeddings_async(input)
|
||||
embeddings = await client.get_embeddings_async(**_input_dict)
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=500, message=str(e))
|
||||
|
||||
|
@ -1440,6 +1544,7 @@ async def async_embedding(
|
|||
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
||||
## Populate OpenAI compliant dictionary
|
||||
embedding_response = []
|
||||
input_tokens: int = 0
|
||||
for idx, embedding in enumerate(embeddings):
|
||||
embedding_response.append(
|
||||
{
|
||||
|
@ -1448,18 +1553,13 @@ async def async_embedding(
|
|||
"embedding": embedding.values,
|
||||
}
|
||||
)
|
||||
input_tokens += embedding.statistics.token_count
|
||||
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = 0
|
||||
|
||||
input_str = "".join(input)
|
||||
|
||||
input_tokens += len(encoding.encode(input_str))
|
||||
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
model_response.usage = usage
|
||||
|
||||
return model_response
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
# What is this?
|
||||
## httpx client for vertex ai calls
|
||||
## Initial implementation - covers gemini + image gen calls
|
||||
from functools import partial
|
||||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
|
@ -9,6 +13,284 @@ import litellm, uuid
|
|||
import httpx, inspect # type: ignore
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from .base import BaseLLM
|
||||
from litellm.types.llms.vertex_ai import (
|
||||
ContentType,
|
||||
SystemInstructions,
|
||||
PartType,
|
||||
RequestBody,
|
||||
GenerateContentResponseBody,
|
||||
FunctionCallingConfig,
|
||||
FunctionDeclaration,
|
||||
Tools,
|
||||
ToolConfig,
|
||||
GenerationConfig,
|
||||
)
|
||||
from litellm.llms.vertex_ai import _gemini_convert_messages_with_history
|
||||
from litellm.types.utils import GenericStreamingChunk
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionUsageBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionResponseMessage,
|
||||
)
|
||||
|
||||
|
||||
class VertexGeminiConfig:
|
||||
"""
|
||||
Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
|
||||
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
|
||||
|
||||
The class `VertexAIConfig` provides configuration for the VertexAI's API interface. Below are the parameters:
|
||||
|
||||
- `temperature` (float): This controls the degree of randomness in token selection.
|
||||
|
||||
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
|
||||
|
||||
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
|
||||
|
||||
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
|
||||
|
||||
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'.
|
||||
|
||||
- `candidate_count` (int): Number of generated responses to return.
|
||||
|
||||
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
|
||||
|
||||
- `frequency_penalty` (float): This parameter is used to penalize the model from repeating the same output. The default value is 0.0.
|
||||
|
||||
- `presence_penalty` (float): This parameter is used to penalize the model from generating the same output as the input. The default value is 0.0.
|
||||
|
||||
Note: Please make sure to modify the default parameters as required for your use case.
|
||||
"""
|
||||
|
||||
temperature: Optional[float] = None
|
||||
max_output_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
top_k: Optional[int] = None
|
||||
response_mime_type: Optional[str] = None
|
||||
candidate_count: Optional[int] = None
|
||||
stop_sequences: Optional[list] = None
|
||||
frequency_penalty: Optional[float] = None
|
||||
presence_penalty: Optional[float] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[float] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
top_k: Optional[int] = None,
|
||||
response_mime_type: Optional[str] = None,
|
||||
candidate_count: Optional[int] = None,
|
||||
stop_sequences: Optional[list] = None,
|
||||
frequency_penalty: Optional[float] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"stream",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"response_format",
|
||||
"n",
|
||||
"stop",
|
||||
]
|
||||
|
||||
def map_tool_choice_values(
|
||||
self, model: str, tool_choice: Union[str, dict]
|
||||
) -> Optional[ToolConfig]:
|
||||
if tool_choice == "none":
|
||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
|
||||
elif tool_choice == "required":
|
||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
|
||||
elif tool_choice == "auto":
|
||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
|
||||
elif isinstance(tool_choice, dict):
|
||||
# only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
|
||||
name = tool_choice.get("function", {}).get("name", "")
|
||||
return ToolConfig(
|
||||
functionCallingConfig=FunctionCallingConfig(
|
||||
mode="ANY", allowed_function_names=[name]
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise litellm.utils.UnsupportedParamsError(
|
||||
message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
|
||||
tool_choice
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
model: str,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["top_p"] = value
|
||||
if (
|
||||
param == "stream" and value is True
|
||||
): # sending stream = False, can cause it to get passed unchecked and raise issues
|
||||
optional_params["stream"] = value
|
||||
if param == "n":
|
||||
optional_params["candidate_count"] = value
|
||||
if param == "stop":
|
||||
if isinstance(value, str):
|
||||
optional_params["stop_sequences"] = [value]
|
||||
elif isinstance(value, list):
|
||||
optional_params["stop_sequences"] = value
|
||||
if param == "max_tokens":
|
||||
optional_params["max_output_tokens"] = value
|
||||
if param == "response_format" and value["type"] == "json_object": # type: ignore
|
||||
optional_params["response_mime_type"] = "application/json"
|
||||
if param == "frequency_penalty":
|
||||
optional_params["frequency_penalty"] = value
|
||||
if param == "presence_penalty":
|
||||
optional_params["presence_penalty"] = value
|
||||
if param == "tools" and isinstance(value, list):
|
||||
gtool_func_declarations = []
|
||||
for tool in value:
|
||||
gtool_func_declaration = FunctionDeclaration(
|
||||
name=tool["function"]["name"],
|
||||
description=tool["function"].get("description", ""),
|
||||
parameters=tool["function"].get("parameters", {}),
|
||||
)
|
||||
gtool_func_declarations.append(gtool_func_declaration)
|
||||
optional_params["tools"] = [
|
||||
Tools(function_declarations=gtool_func_declarations)
|
||||
]
|
||||
if param == "tool_choice" and (
|
||||
isinstance(value, str) or isinstance(value, dict)
|
||||
):
|
||||
_tool_choice_value = self.map_tool_choice_values(
|
||||
model=model, tool_choice=value # type: ignore
|
||||
)
|
||||
if _tool_choice_value is not None:
|
||||
optional_params["tool_choice"] = _tool_choice_value
|
||||
return optional_params
|
||||
|
||||
def get_mapped_special_auth_params(self) -> dict:
|
||||
"""
|
||||
Common auth params across bedrock/vertex_ai/azure/watsonx
|
||||
"""
|
||||
return {"project": "vertex_project", "region_name": "vertex_location"}
|
||||
|
||||
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
||||
mapped_params = self.get_mapped_special_auth_params()
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param in mapped_params:
|
||||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
def get_eu_regions(self) -> List[str]:
|
||||
"""
|
||||
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#available-regions
|
||||
"""
|
||||
return [
|
||||
"europe-central2",
|
||||
"europe-north1",
|
||||
"europe-southwest1",
|
||||
"europe-west1",
|
||||
"europe-west2",
|
||||
"europe-west3",
|
||||
"europe-west4",
|
||||
"europe-west6",
|
||||
"europe-west8",
|
||||
"europe-west9",
|
||||
]
|
||||
|
||||
|
||||
async def make_call(
|
||||
client: Optional[AsyncHTTPHandler],
|
||||
api_base: str,
|
||||
headers: dict,
|
||||
data: str,
|
||||
model: str,
|
||||
messages: list,
|
||||
logging_obj,
|
||||
):
|
||||
if client is None:
|
||||
client = AsyncHTTPHandler() # Create a new client if none provided
|
||||
|
||||
response = await client.post(api_base, headers=headers, data=data, stream=True)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise VertexAIError(status_code=response.status_code, message=response.text)
|
||||
|
||||
completion_stream = ModelResponseIterator(
|
||||
streaming_response=response.aiter_bytes(chunk_size=2056)
|
||||
)
|
||||
# LOGGING
|
||||
logging_obj.post_call(
|
||||
input=messages,
|
||||
api_key="",
|
||||
original_response="first stream response received",
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
|
||||
return completion_stream
|
||||
|
||||
|
||||
def make_sync_call(
|
||||
client: Optional[HTTPHandler],
|
||||
api_base: str,
|
||||
headers: dict,
|
||||
data: str,
|
||||
model: str,
|
||||
messages: list,
|
||||
logging_obj,
|
||||
):
|
||||
if client is None:
|
||||
client = HTTPHandler() # Create a new client if none provided
|
||||
|
||||
response = client.post(api_base, headers=headers, data=data, stream=True)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise VertexAIError(status_code=response.status_code, message=response.read())
|
||||
|
||||
completion_stream = ModelResponseIterator(
|
||||
streaming_response=response.iter_bytes(chunk_size=2056)
|
||||
)
|
||||
|
||||
# LOGGING
|
||||
logging_obj.post_call(
|
||||
input=messages,
|
||||
api_key="",
|
||||
original_response="first stream response received",
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
|
||||
return completion_stream
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
@ -33,16 +315,125 @@ class VertexLLM(BaseLLM):
|
|||
self.project_id: Optional[str] = None
|
||||
self.async_handler: Optional[AsyncHTTPHandler] = None
|
||||
|
||||
def load_auth(self) -> Tuple[Any, str]:
|
||||
def _process_response(
|
||||
self,
|
||||
model: str,
|
||||
response: httpx.Response,
|
||||
model_response: ModelResponse,
|
||||
logging_obj: litellm.utils.Logging,
|
||||
optional_params: dict,
|
||||
api_key: str,
|
||||
data: Union[dict, str],
|
||||
messages: List,
|
||||
print_verbose,
|
||||
encoding,
|
||||
) -> ModelResponse:
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=messages,
|
||||
api_key="",
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
|
||||
print_verbose(f"raw model_response: {response.text}")
|
||||
|
||||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = GenerateContentResponseBody(**response.json()) # type: ignore
|
||||
except Exception as e:
|
||||
raise VertexAIError(
|
||||
message="Received={}, Error converting to valid response block={}. File an issue if litellm error - https://github.com/BerriAI/litellm/issues".format(
|
||||
response.text, str(e)
|
||||
),
|
||||
status_code=422,
|
||||
)
|
||||
|
||||
model_response.choices = [] # type: ignore
|
||||
|
||||
## GET MODEL ##
|
||||
model_response.model = model
|
||||
## GET TEXT ##
|
||||
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
|
||||
content_str = ""
|
||||
tools: List[ChatCompletionToolCallChunk] = []
|
||||
for idx, candidate in enumerate(completion_response["candidates"]):
|
||||
if "content" not in candidate:
|
||||
continue
|
||||
|
||||
if "text" in candidate["content"]["parts"][0]:
|
||||
content_str = candidate["content"]["parts"][0]["text"]
|
||||
|
||||
if "functionCall" in candidate["content"]["parts"][0]:
|
||||
_function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||
name=candidate["content"]["parts"][0]["functionCall"]["name"],
|
||||
arguments=json.dumps(
|
||||
candidate["content"]["parts"][0]["functionCall"]["args"]
|
||||
),
|
||||
)
|
||||
_tool_response_chunk = ChatCompletionToolCallChunk(
|
||||
id=f"call_{str(uuid.uuid4())}",
|
||||
type="function",
|
||||
function=_function_chunk,
|
||||
)
|
||||
tools.append(_tool_response_chunk)
|
||||
|
||||
chat_completion_message["content"] = content_str
|
||||
chat_completion_message["tool_calls"] = tools
|
||||
|
||||
choice = litellm.Choices(
|
||||
finish_reason=candidate.get("finishReason", "stop"),
|
||||
index=candidate.get("index", idx),
|
||||
message=chat_completion_message, # type: ignore
|
||||
logprobs=None,
|
||||
enhancements=None,
|
||||
)
|
||||
|
||||
model_response.choices.append(choice)
|
||||
|
||||
## GET USAGE ##
|
||||
usage = litellm.Usage(
|
||||
prompt_tokens=completion_response["usageMetadata"]["promptTokenCount"],
|
||||
completion_tokens=completion_response["usageMetadata"][
|
||||
"candidatesTokenCount"
|
||||
],
|
||||
total_tokens=completion_response["usageMetadata"]["totalTokenCount"],
|
||||
)
|
||||
|
||||
setattr(model_response, "usage", usage)
|
||||
|
||||
return model_response
|
||||
|
||||
def get_vertex_region(self, vertex_region: Optional[str]) -> str:
|
||||
return vertex_region or "us-central1"
|
||||
|
||||
def load_auth(
|
||||
self, credentials: Optional[str], project_id: Optional[str]
|
||||
) -> Tuple[Any, str]:
|
||||
from google.auth.transport.requests import Request # type: ignore[import-untyped]
|
||||
from google.auth.credentials import Credentials # type: ignore[import-untyped]
|
||||
import google.auth as google_auth
|
||||
|
||||
credentials, project_id = google_auth.default(
|
||||
if credentials is not None and isinstance(credentials, str):
|
||||
import google.oauth2.service_account
|
||||
|
||||
json_obj = json.loads(credentials)
|
||||
|
||||
creds = google.oauth2.service_account.Credentials.from_service_account_info(
|
||||
json_obj,
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||||
)
|
||||
|
||||
credentials.refresh(Request())
|
||||
if project_id is None:
|
||||
project_id = creds.project_id
|
||||
else:
|
||||
creds, project_id = google_auth.default(
|
||||
quota_project_id=project_id,
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||||
)
|
||||
|
||||
creds.refresh(Request())
|
||||
|
||||
if not project_id:
|
||||
raise ValueError("Could not resolve project_id")
|
||||
|
@ -52,38 +443,272 @@ class VertexLLM(BaseLLM):
|
|||
f"Expected project_id to be a str but got {type(project_id)}"
|
||||
)
|
||||
|
||||
return credentials, project_id
|
||||
return creds, project_id
|
||||
|
||||
def refresh_auth(self, credentials: Any) -> None:
|
||||
from google.auth.transport.requests import Request # type: ignore[import-untyped]
|
||||
|
||||
credentials.refresh(Request())
|
||||
|
||||
def _prepare_request(self, request: httpx.Request) -> None:
|
||||
access_token = self._ensure_access_token()
|
||||
|
||||
if request.headers.get("Authorization"):
|
||||
# already authenticated, nothing for us to do
|
||||
return
|
||||
|
||||
request.headers["Authorization"] = f"Bearer {access_token}"
|
||||
|
||||
def _ensure_access_token(self) -> str:
|
||||
if self.access_token is not None:
|
||||
return self.access_token
|
||||
def _ensure_access_token(
|
||||
self, credentials: Optional[str], project_id: Optional[str]
|
||||
) -> Tuple[str, str]:
|
||||
"""
|
||||
Returns auth token and project id
|
||||
"""
|
||||
if self.access_token is not None and self.project_id is not None:
|
||||
return self.access_token, self.project_id
|
||||
|
||||
if not self._credentials:
|
||||
self._credentials, project_id = self.load_auth()
|
||||
self._credentials, project_id = self.load_auth(
|
||||
credentials=credentials, project_id=project_id
|
||||
)
|
||||
if not self.project_id:
|
||||
self.project_id = project_id
|
||||
else:
|
||||
self.refresh_auth(self._credentials)
|
||||
|
||||
if not self._credentials.token:
|
||||
if not self.project_id:
|
||||
self.project_id = self._credentials.project_id
|
||||
|
||||
if not self.project_id:
|
||||
raise ValueError("Could not resolve project_id")
|
||||
|
||||
if not self._credentials or not self._credentials.token:
|
||||
raise RuntimeError("Could not resolve API token from the environment")
|
||||
|
||||
assert isinstance(self._credentials.token, str)
|
||||
return self._credentials.token
|
||||
return self._credentials.token, self.project_id
|
||||
|
||||
async def async_streaming(
|
||||
self,
|
||||
model: str,
|
||||
messages: list,
|
||||
api_base: str,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
data: str,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
encoding,
|
||||
logging_obj,
|
||||
stream,
|
||||
optional_params: dict,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers={},
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
) -> CustomStreamWrapper:
|
||||
streaming_response = CustomStreamWrapper(
|
||||
completion_stream=None,
|
||||
make_call=partial(
|
||||
make_call,
|
||||
client=client,
|
||||
api_base=api_base,
|
||||
headers=headers,
|
||||
data=data,
|
||||
model=model,
|
||||
messages=messages,
|
||||
logging_obj=logging_obj,
|
||||
),
|
||||
model=model,
|
||||
custom_llm_provider="vertex_ai_beta",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return streaming_response
|
||||
|
||||
async def async_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list,
|
||||
api_base: str,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
data: str,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
encoding,
|
||||
logging_obj,
|
||||
stream,
|
||||
optional_params: dict,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers={},
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
if client is None:
|
||||
_params = {}
|
||||
if timeout is not None:
|
||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = timeout
|
||||
client = AsyncHTTPHandler(**_params) # type: ignore
|
||||
else:
|
||||
client = client # type: ignore
|
||||
|
||||
try:
|
||||
response = await client.post(api_base, headers=headers, json=data) # type: ignore
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
raise VertexAIError(status_code=error_code, message=err.response.text)
|
||||
except httpx.TimeoutException:
|
||||
raise VertexAIError(status_code=408, message="Timeout error occurred.")
|
||||
|
||||
return self._process_response(
|
||||
model=model,
|
||||
response=response,
|
||||
model_response=model_response,
|
||||
logging_obj=logging_obj,
|
||||
api_key="",
|
||||
data=data,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
encoding=encoding,
|
||||
)
|
||||
|
||||
def completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
logging_obj,
|
||||
optional_params: dict,
|
||||
acompletion: bool,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
vertex_project: Optional[str],
|
||||
vertex_location: Optional[str],
|
||||
vertex_credentials: Optional[str],
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
|
||||
auth_header, vertex_project = self._ensure_access_token(
|
||||
credentials=vertex_credentials, project_id=vertex_project
|
||||
)
|
||||
vertex_location = self.get_vertex_region(vertex_region=vertex_location)
|
||||
stream: Optional[bool] = optional_params.pop("stream", None) # type: ignore
|
||||
|
||||
### SET RUNTIME ENDPOINT ###
|
||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:generateContent"
|
||||
|
||||
## TRANSFORMATION ##
|
||||
# Separate system prompt from rest of message
|
||||
system_prompt_indices = []
|
||||
system_content_blocks: List[PartType] = []
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
_system_content_block = PartType(text=message["content"])
|
||||
system_content_blocks.append(_system_content_block)
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
system_instructions = SystemInstructions(parts=system_content_blocks)
|
||||
content = _gemini_convert_messages_with_history(messages=messages)
|
||||
tools: Optional[Tools] = optional_params.pop("tools", None)
|
||||
tool_choice: Optional[ToolConfig] = optional_params.pop("tool_choice", None)
|
||||
generation_config: Optional[GenerationConfig] = GenerationConfig(
|
||||
**optional_params
|
||||
)
|
||||
data = RequestBody(system_instruction=system_instructions, contents=content)
|
||||
if tools is not None:
|
||||
data["tools"] = tools
|
||||
if tool_choice is not None:
|
||||
data["toolConfig"] = tool_choice
|
||||
if generation_config is not None:
|
||||
data["generationConfig"] = generation_config
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
"Authorization": f"Bearer {auth_header}",
|
||||
}
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=messages,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"api_base": url,
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
### ROUTING (ASYNC, STREAMING, SYNC)
|
||||
if acompletion:
|
||||
### ASYNC COMPLETION
|
||||
return self.async_completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
data=data, # type: ignore
|
||||
api_base=url,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
encoding=encoding,
|
||||
logging_obj=logging_obj,
|
||||
optional_params=optional_params,
|
||||
stream=stream,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
client=client, # type: ignore
|
||||
)
|
||||
|
||||
## SYNC STREAMING CALL ##
|
||||
if stream is not None and stream is True:
|
||||
streaming_response = CustomStreamWrapper(
|
||||
completion_stream=None,
|
||||
make_call=partial(
|
||||
make_sync_call,
|
||||
client=None,
|
||||
api_base=url,
|
||||
headers=headers, # type: ignore
|
||||
data=json.dumps(data),
|
||||
model=model,
|
||||
messages=messages,
|
||||
logging_obj=logging_obj,
|
||||
),
|
||||
model=model,
|
||||
custom_llm_provider="vertex_ai_beta",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
return streaming_response
|
||||
## COMPLETION CALL ##
|
||||
if client is None or isinstance(client, AsyncHTTPHandler):
|
||||
_params = {}
|
||||
if timeout is not None:
|
||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||
timeout = httpx.Timeout(timeout)
|
||||
_params["timeout"] = timeout
|
||||
client = HTTPHandler(**_params) # type: ignore
|
||||
else:
|
||||
client = client
|
||||
try:
|
||||
response = client.post(url=url, headers=headers, json=data) # type: ignore
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
raise VertexAIError(status_code=error_code, message=response.text)
|
||||
except httpx.TimeoutException:
|
||||
raise VertexAIError(status_code=408, message="Timeout error occurred.")
|
||||
|
||||
return self._process_response(
|
||||
model=model,
|
||||
response=response,
|
||||
model_response=model_response,
|
||||
logging_obj=logging_obj,
|
||||
optional_params=optional_params,
|
||||
api_key="",
|
||||
data=data, # type: ignore
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
encoding=encoding,
|
||||
)
|
||||
|
||||
def image_generation(
|
||||
self,
|
||||
|
@ -163,7 +788,7 @@ class VertexLLM(BaseLLM):
|
|||
} \
|
||||
"https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
|
||||
"""
|
||||
auth_header = self._ensure_access_token()
|
||||
auth_header, _ = self._ensure_access_token(credentials=None, project_id=None)
|
||||
optional_params = optional_params or {
|
||||
"sampleCount": 1
|
||||
} # default optional params
|
||||
|
@ -222,3 +847,84 @@ class VertexLLM(BaseLLM):
|
|||
model_response.data = _response_data
|
||||
|
||||
return model_response
|
||||
|
||||
|
||||
class ModelResponseIterator:
|
||||
def __init__(self, streaming_response):
|
||||
self.streaming_response = streaming_response
|
||||
self.response_iterator = iter(self.streaming_response)
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
||||
try:
|
||||
processed_chunk = GenerateContentResponseBody(**chunk) # type: ignore
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
usage: Optional[ChatCompletionUsageBlock] = None
|
||||
|
||||
gemini_chunk = processed_chunk["candidates"][0]
|
||||
|
||||
if (
|
||||
"content" in gemini_chunk
|
||||
and "text" in gemini_chunk["content"]["parts"][0]
|
||||
):
|
||||
text = gemini_chunk["content"]["parts"][0]["text"]
|
||||
|
||||
if "finishReason" in gemini_chunk:
|
||||
finish_reason = map_finish_reason(
|
||||
finish_reason=gemini_chunk["finishReason"]
|
||||
)
|
||||
is_finished = True
|
||||
|
||||
if "usageMetadata" in processed_chunk:
|
||||
usage = ChatCompletionUsageBlock(
|
||||
prompt_tokens=processed_chunk["usageMetadata"]["promptTokenCount"],
|
||||
completion_tokens=processed_chunk["usageMetadata"][
|
||||
"candidatesTokenCount"
|
||||
],
|
||||
total_tokens=processed_chunk["usageMetadata"]["totalTokenCount"],
|
||||
)
|
||||
|
||||
returned_chunk = GenericStreamingChunk(
|
||||
text=text,
|
||||
tool_use=tool_use,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
index=0,
|
||||
)
|
||||
return returned_chunk
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
|
||||
|
||||
# Sync iterator
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
chunk = next(self.response_iterator)
|
||||
chunk = chunk.decode()
|
||||
json_chunk = json.loads(chunk)
|
||||
return self.chunk_parser(chunk=json_chunk)
|
||||
except StopIteration:
|
||||
raise StopIteration
|
||||
except ValueError as e:
|
||||
raise RuntimeError(f"Error parsing chunk: {e}")
|
||||
|
||||
# Async iterator
|
||||
def __aiter__(self):
|
||||
self.async_response_iterator = self.streaming_response.__aiter__()
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
try:
|
||||
chunk = await self.async_response_iterator.__anext__()
|
||||
chunk = chunk.decode()
|
||||
json_chunk = json.loads(chunk)
|
||||
return self.chunk_parser(chunk=json_chunk)
|
||||
except StopAsyncIteration:
|
||||
raise StopAsyncIteration
|
||||
except ValueError as e:
|
||||
raise RuntimeError(f"Error parsing chunk: {e}")
|
||||
|
|
|
@ -329,12 +329,14 @@ async def acompletion(
|
|||
or custom_llm_provider == "ollama_chat"
|
||||
or custom_llm_provider == "replicate"
|
||||
or custom_llm_provider == "vertex_ai"
|
||||
or custom_llm_provider == "vertex_ai_beta"
|
||||
or custom_llm_provider == "gemini"
|
||||
or custom_llm_provider == "sagemaker"
|
||||
or custom_llm_provider == "anthropic"
|
||||
or custom_llm_provider == "predibase"
|
||||
or custom_llm_provider == "bedrock"
|
||||
or custom_llm_provider == "databricks"
|
||||
or custom_llm_provider == "clarifai"
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
@ -1875,6 +1877,42 @@ def completion(
|
|||
)
|
||||
return response
|
||||
response = model_response
|
||||
elif custom_llm_provider == "vertex_ai_beta":
|
||||
vertex_ai_project = (
|
||||
optional_params.pop("vertex_project", None)
|
||||
or optional_params.pop("vertex_ai_project", None)
|
||||
or litellm.vertex_project
|
||||
or get_secret("VERTEXAI_PROJECT")
|
||||
)
|
||||
vertex_ai_location = (
|
||||
optional_params.pop("vertex_location", None)
|
||||
or optional_params.pop("vertex_ai_location", None)
|
||||
or litellm.vertex_location
|
||||
or get_secret("VERTEXAI_LOCATION")
|
||||
)
|
||||
vertex_credentials = (
|
||||
optional_params.pop("vertex_credentials", None)
|
||||
or optional_params.pop("vertex_ai_credentials", None)
|
||||
or get_secret("VERTEXAI_CREDENTIALS")
|
||||
)
|
||||
new_params = deepcopy(optional_params)
|
||||
response = vertex_chat_completion.completion( # type: ignore
|
||||
model=model,
|
||||
messages=messages,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=new_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
encoding=encoding,
|
||||
vertex_location=vertex_ai_location,
|
||||
vertex_project=vertex_ai_project,
|
||||
vertex_credentials=vertex_credentials,
|
||||
logging_obj=logging,
|
||||
acompletion=acompletion,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
vertex_ai_project = (
|
||||
optional_params.pop("vertex_project", None)
|
||||
|
@ -1893,6 +1931,7 @@ def completion(
|
|||
or optional_params.pop("vertex_ai_credentials", None)
|
||||
or get_secret("VERTEXAI_CREDENTIALS")
|
||||
)
|
||||
|
||||
new_params = deepcopy(optional_params)
|
||||
if "claude-3" in model:
|
||||
model_response = vertex_ai_anthropic.completion(
|
||||
|
|
|
@ -3347,6 +3347,24 @@
|
|||
"litellm_provider": "deepinfra",
|
||||
"mode": "chat"
|
||||
},
|
||||
"deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 8191,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000008,
|
||||
"output_cost_per_token": 0.00000008,
|
||||
"litellm_provider": "deepinfra",
|
||||
"mode": "chat"
|
||||
},
|
||||
"deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 8191,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000059,
|
||||
"output_cost_per_token": 0.00000079,
|
||||
"litellm_provider": "deepinfra",
|
||||
"mode": "chat"
|
||||
},
|
||||
"deepinfra/01-ai/Yi-34B-200K": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
|
1
litellm/proxy/_experimental/out/404.html
Normal file
1
litellm/proxy/_experimental/out/404.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
|||
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(64090);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
|
|
@ -0,0 +1 @@
|
|||
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[665],{30953:function(e,t,r){r.d(t,{GH$:function(){return n}});var l=r(2265);let n=e=>{let{color:t="currentColor",size:r=24,className:n,...s}=e;return l.createElement("svg",{viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",width:r,height:r,fill:t,...s,className:"remixicon "+(n||"")},l.createElement("path",{d:"M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM11.0026 16L6.75999 11.7574L8.17421 10.3431L11.0026 13.1716L16.6595 7.51472L18.0737 8.92893L11.0026 16Z"}))}}}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(57437);n(2265);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(57437),a=l(2265),r=l(47907),n=l(2179),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,505,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);
|
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,l){Promise.resolve().then(l.bind(l,667))},667:function(e,s,l){"use strict";l.r(s),l.d(s,{default:function(){return _}});var t=l(3827),a=l(64090),r=l(47907),n=l(16450),i=l(18190),o=l(13810),u=l(10384),c=l(46453),d=l(71801),m=l(52273),h=l(42440),x=l(30953),j=l(777),p=l(37963),f=l(60620),g=l(1861);function _(){let[e]=f.Z.useForm(),s=(0,r.useSearchParams)();s.get("token");let l=s.get("id"),[_,Z]=(0,a.useState)(null),[w,b]=(0,a.useState)(""),[N,S]=(0,a.useState)(""),[k,y]=(0,a.useState)(null),[v,E]=(0,a.useState)(""),[F,I]=(0,a.useState)("");return(0,a.useEffect)(()=>{l&&(0,j.W_)(l).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let l=e.token,t=(0,p.o)(l);I(l),console.log("decoded:",t),Z(t.key),console.log("decoded user email:",t.user_email),S(t.user_email),y(t.user_id)})},[l]),(0,t.jsx)("div",{className:"mx-auto max-w-md mt-10",children:(0,t.jsxs)(o.Z,{children:[(0,t.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,t.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,t.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,t.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,t.jsxs)(c.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,t.jsx)(u.Z,{children:"SSO is under the Enterprise Tirer."}),(0,t.jsx)(u.Z,{children:(0,t.jsx)(n.Z,{variant:"primary",className:"mb-2",children:(0,t.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,t.jsxs)(f.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",_,"token:",F,"formValues:",e),_&&F&&(e.user_email=N,k&&l&&(0,j.m_)(_,l,k,e.password).then(e=>{var s;let l="/ui/";console.log("redirecting to:",l+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id)+"&token="+F),window.location.href=l}))},children:[(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(f.Z.Item,{label:"Email Address",name:"user_email",children:(0,t.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,t.jsx)(f.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,t.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,t.jsx)("div",{className:"mt-10",children:(0,t.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,294,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f02cb03d96e276ef.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/63f65dbb14efd996.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-887c75b16b85d4b4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/63f65dbb14efd996.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68101,[\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"505\",\"static/chunks/505-5ff3c318fddfa35c.js\",\"131\",\"static/chunks/131-cb6bfe24e23e121b.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"759\",\"static/chunks/759-c0083d8a782d300e.js\",\"777\",\"static/chunks/777-71fb78fdb4897cc3.js\",\"931\",\"static/chunks/app/page-8028473f1a04553d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/63f65dbb14efd996.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"sTvd1VbHSi_TBr1KiIpul\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
|
||||
3:I[68101,["936","static/chunks/2f6dbc85-cac2949a76539886.js","505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","684","static/chunks/684-16b194c83a169f6d.js","759","static/chunks/759-c0083d8a782d300e.js","777","static/chunks/777-71fb78fdb4897cc3.js","931","static/chunks/app/page-8028473f1a04553d.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
||||
3:I[87494,["505","static/chunks/505-5ff3c318fddfa35c.js","131","static/chunks/131-cb6bfe24e23e121b.js","777","static/chunks/777-71fb78fdb4897cc3.js","418","static/chunks/app/model_hub/page-a1942d43573c82c3.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
||||
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","505","static/chunks/505-5ff3c318fddfa35c.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-71fb78fdb4897cc3.js","461","static/chunks/app/onboarding/page-49a30e653b6ae929.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["sTvd1VbHSi_TBr1KiIpul",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/63f65dbb14efd996.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -11,7 +11,7 @@ model_list:
|
|||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: predibase/llama-3-8b-instruct
|
||||
api_base: "http://0.0.0.0:8081"
|
||||
api_base: "http://0.0.0.0:8000"
|
||||
api_key: os.environ/PREDIBASE_API_KEY
|
||||
tenant_id: os.environ/PREDIBASE_TENANT_ID
|
||||
max_retries: 0
|
||||
|
@ -55,7 +55,16 @@ model_list:
|
|||
model: textembedding-gecko-multilingual@001
|
||||
vertex_project: my-project-9d5c
|
||||
vertex_location: us-central1
|
||||
|
||||
- model_name: lbl/command-r-plus
|
||||
litellm_params:
|
||||
model: openai/lbl/command-r-plus
|
||||
api_key: "os.environ/VLLM_API_KEY"
|
||||
api_base: http://vllm-command:8000/v1
|
||||
rpm: 1000
|
||||
input_cost_per_token: 0
|
||||
output_cost_per_token: 0
|
||||
model_info:
|
||||
max_input_tokens: 80920
|
||||
assistant_settings:
|
||||
custom_llm_provider: openai
|
||||
litellm_params:
|
||||
|
|
|
@ -224,6 +224,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/key/delete",
|
||||
"/global/spend/logs",
|
||||
"/global/predict/spend/logs",
|
||||
"/sso/get/logout_url",
|
||||
]
|
||||
|
||||
management_routes: List = [ # key
|
||||
|
|
|
@ -32,7 +32,7 @@ def management_endpoint_wrapper(func):
|
|||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
|
@ -67,6 +67,7 @@ def management_endpoint_wrapper(func):
|
|||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
if _http_request:
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
|
|
|
@ -140,7 +140,7 @@ class _PROXY_AzureContentSafety(
|
|||
response.choices[0], litellm.utils.Choices
|
||||
):
|
||||
await self.test_violation(
|
||||
content=response.choices[0].message.content, source="output"
|
||||
content=response.choices[0].message.content or "", source="output"
|
||||
)
|
||||
|
||||
# async def async_post_call_streaming_hook(
|
||||
|
|
|
@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
|
|||
data["cache"][k] = v
|
||||
|
||||
verbose_proxy_logger.debug("receiving data: %s", data)
|
||||
# users can pass in 'user' param to /chat/completions. Don't override it
|
||||
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
|
||||
# if users are using user_api_key_auth, set `user` in `data`
|
||||
data["user"] = user_api_key_dict.user_id
|
||||
|
||||
if "metadata" not in data:
|
||||
data["metadata"] = {}
|
||||
|
@ -108,6 +104,15 @@ async def add_litellm_data_to_request(
|
|||
data["metadata"]["user_api_key_team_alias"] = getattr(
|
||||
user_api_key_dict, "team_alias", None
|
||||
)
|
||||
|
||||
# Team spend, budget - used by prometheus.py
|
||||
data["metadata"]["user_api_key_team_max_budget"] = user_api_key_dict.team_max_budget
|
||||
data["metadata"]["user_api_key_team_spend"] = user_api_key_dict.team_spend
|
||||
|
||||
# API Key spend, budget - used by prometheus.py
|
||||
data["metadata"]["user_api_key_spend"] = user_api_key_dict.spend
|
||||
data["metadata"]["user_api_key_max_budget"] = user_api_key_dict.max_budget
|
||||
|
||||
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
||||
_headers = dict(request.headers)
|
||||
_headers.pop(
|
||||
|
|
|
@ -14,17 +14,18 @@ model_list:
|
|||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: my-triton-model
|
||||
- model_name: mistral-embed
|
||||
litellm_params:
|
||||
model: triton/any"
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
|
||||
model: mistral/mistral-embed
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"]
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
store_audit_logs: true
|
||||
turn_off_message_logging: true
|
||||
redact_messages_in_exceptions: True
|
||||
enforced_params:
|
||||
- user
|
||||
|
|
|
@ -879,6 +879,7 @@ async def user_api_key_auth(
|
|||
|
||||
## check for cache hit (In-Memory Cache)
|
||||
original_api_key = api_key # (Patch: For DynamoDB Backwards Compatibility)
|
||||
_user_role = None
|
||||
if api_key.startswith("sk-"):
|
||||
api_key = hash_token(token=api_key)
|
||||
valid_token: Optional[UserAPIKeyAuth] = user_api_key_cache.get_cache( # type: ignore
|
||||
|
@ -1512,7 +1513,7 @@ async def user_api_key_auth(
|
|||
):
|
||||
return UserAPIKeyAuth(
|
||||
api_key=api_key,
|
||||
user_role="app_owner",
|
||||
user_role=_user_role,
|
||||
parent_otel_span=parent_otel_span,
|
||||
**valid_token_dict,
|
||||
)
|
||||
|
@ -6649,7 +6650,7 @@ async def generate_key_fn(
|
|||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = json.dumps(response)
|
||||
_updated_values = json.dumps(response, default=str)
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
request_data=LiteLLM_AuditLogs(
|
||||
|
@ -6754,10 +6755,10 @@ async def update_key_fn(
|
|||
|
||||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = json.dumps(data_json)
|
||||
_updated_values = json.dumps(data_json, default=str)
|
||||
|
||||
_before_value = existing_key_row.json(exclude_none=True)
|
||||
_before_value = json.dumps(_before_value)
|
||||
_before_value = json.dumps(_before_value, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -6853,7 +6854,7 @@ async def delete_key_fn(
|
|||
)
|
||||
|
||||
key_row = key_row.json(exclude_none=True)
|
||||
_key_row = json.dumps(key_row)
|
||||
_key_row = json.dumps(key_row, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -7057,6 +7058,7 @@ async def info_key_fn(
|
|||
"/spend/keys",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def spend_key_fn():
|
||||
"""
|
||||
|
@ -7089,6 +7091,7 @@ async def spend_key_fn():
|
|||
"/spend/users",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def spend_user_fn(
|
||||
user_id: Optional[str] = fastapi.Query(
|
||||
|
@ -7219,6 +7222,7 @@ async def view_spend_tags(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
|
@ -7322,6 +7326,7 @@ async def get_global_activity(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity_model(
|
||||
start_date: Optional[str] = fastapi.Query(
|
||||
|
@ -7468,6 +7473,7 @@ async def get_global_activity_model(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity_exceptions_per_deployment(
|
||||
model_group: str = fastapi.Query(
|
||||
|
@ -7620,6 +7626,7 @@ async def get_global_activity_exceptions_per_deployment(
|
|||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_global_activity_exceptions(
|
||||
model_group: str = fastapi.Query(
|
||||
|
@ -7830,7 +7837,6 @@ async def get_global_spend_provider(
|
|||
"/global/spend/report",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
responses={
|
||||
200: {"model": List[LiteLLM_SpendLogs]},
|
||||
},
|
||||
|
@ -8530,6 +8536,7 @@ async def global_spend_reset():
|
|||
"/global/spend/logs",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_logs(
|
||||
api_key: str = fastapi.Query(
|
||||
|
@ -8575,6 +8582,7 @@ async def global_spend_logs(
|
|||
"/global/spend",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend():
|
||||
"""
|
||||
|
@ -8601,6 +8609,7 @@ async def global_spend():
|
|||
"/global/spend/keys",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_keys(
|
||||
limit: int = fastapi.Query(
|
||||
|
@ -8628,6 +8637,7 @@ async def global_spend_keys(
|
|||
"/global/spend/teams",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_per_team():
|
||||
"""
|
||||
|
@ -8752,6 +8762,7 @@ async def global_view_all_end_users():
|
|||
"/global/spend/end_users",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
|
||||
"""
|
||||
|
@ -8804,6 +8815,7 @@ LIMIT 100
|
|||
"/global/spend/models",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_spend_models(
|
||||
limit: int = fastapi.Query(
|
||||
|
@ -8832,6 +8844,7 @@ async def global_spend_models(
|
|||
"/global/predict/spend/logs",
|
||||
tags=["Budget & Spend Tracking"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def global_predict_spend_logs(request: Request):
|
||||
from enterprise.utils import _forecast_daily_cost
|
||||
|
@ -8863,7 +8876,7 @@ async def new_user(data: NewUserRequest):
|
|||
- organization_id: Optional[str] - specify the org a user belongs to.
|
||||
- user_email: Optional[str] - Specify a user email.
|
||||
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
|
||||
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
|
||||
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
|
||||
- max_budget: Optional[float] - Specify max budget for a given user.
|
||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
||||
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
||||
|
@ -9947,16 +9960,18 @@ async def new_team(
|
|||
"""
|
||||
Allow users to create a new team. Apply user permissions to their team.
|
||||
|
||||
[ASK FOR HELP](https://github.com/BerriAI/litellm/issues)
|
||||
👉 [Detailed Doc on setting team budgets](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
|
||||
|
||||
Parameters:
|
||||
- team_alias: Optional[str] - User defined team alias
|
||||
- team_id: Optional[str] - The team id of the user. If none passed, we'll generate it.
|
||||
- members_with_roles: List[{"role": "admin" or "user", "user_id": "<user-id>"}] - A list of users and their roles in the team. Get user_id when making a new user via `/user/new`.
|
||||
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
- metadata: Optional[dict] - Metadata for team, store information for team. Example metadata = {"extra_info": "some info"}
|
||||
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
|
||||
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
|
||||
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
|
||||
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
|
||||
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
|
||||
|
||||
|
@ -9981,6 +9996,21 @@ async def new_team(
|
|||
{"role": "user", "user_id": "user-2434"}]
|
||||
}'
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/team/new' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data '{
|
||||
"team_alias": "QA Prod Bot",
|
||||
"max_budget": 0.000000001,
|
||||
"budget_duration": "1d"
|
||||
}'
|
||||
|
||||
```
|
||||
"""
|
||||
global prisma_client
|
||||
|
@ -10110,7 +10140,8 @@ async def new_team(
|
|||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_updated_values = complete_team_data.json(exclude_none=True)
|
||||
_updated_values = json.dumps(_updated_values)
|
||||
|
||||
_updated_values = json.dumps(_updated_values, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -10174,6 +10205,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
|||
@management_endpoint_wrapper
|
||||
async def update_team(
|
||||
data: UpdateTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
|
@ -10192,6 +10224,7 @@ async def update_team(
|
|||
- tpm_limit: Optional[int] - The TPM (Tokens Per Minute) limit for this team - all keys with this team_id will have at max this TPM limit
|
||||
- rpm_limit: Optional[int] - The RPM (Requests Per Minute) limit for this team - all keys associated with this team_id will have at max this RPM limit
|
||||
- max_budget: Optional[float] - The maximum budget allocated to the team - all keys for this team_id will have at max this max_budget
|
||||
- budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
|
||||
- models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
|
||||
- blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
|
||||
|
||||
|
@ -10209,6 +10242,20 @@ async def update_team(
|
|||
"tpm_limit": 100
|
||||
}'
|
||||
```
|
||||
|
||||
Example - Update Team `max_budget` budget
|
||||
```
|
||||
curl --location 'http://0.0.0.0:8000/team/update' \
|
||||
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
|
||||
--header 'Content-Type: application/json' \
|
||||
|
||||
--data-raw '{
|
||||
"team_id": "litellm-test-client-id-new",
|
||||
"max_budget": 10
|
||||
}'
|
||||
```
|
||||
"""
|
||||
global prisma_client
|
||||
|
||||
|
@ -10248,8 +10295,8 @@ async def update_team(
|
|||
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||
if litellm.store_audit_logs is True:
|
||||
_before_value = existing_team_row.json(exclude_none=True)
|
||||
_before_value = json.dumps(_before_value)
|
||||
_after_value: str = json.dumps(updated_kv)
|
||||
_before_value = json.dumps(_before_value, default=str)
|
||||
_after_value: str = json.dumps(updated_kv, default=str)
|
||||
|
||||
asyncio.create_task(
|
||||
create_audit_log_for_update(
|
||||
|
@ -11408,7 +11455,7 @@ async def model_info_v2(
|
|||
for _model in all_models:
|
||||
# provided model_info in config.yaml
|
||||
model_info = _model.get("model_info", {})
|
||||
if debug == True:
|
||||
if debug is True:
|
||||
_openai_client = "None"
|
||||
if llm_router is not None:
|
||||
_openai_client = (
|
||||
|
@ -11433,7 +11480,7 @@ async def model_info_v2(
|
|||
litellm_model = litellm_params.get("model", None)
|
||||
try:
|
||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||
except:
|
||||
except Exception:
|
||||
litellm_model_info = {}
|
||||
# 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
|
||||
if litellm_model_info == {}:
|
||||
|
@ -11444,8 +11491,10 @@ async def model_info_v2(
|
|||
if len(split_model) > 0:
|
||||
litellm_model = split_model[-1]
|
||||
try:
|
||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||
except:
|
||||
litellm_model_info = litellm.get_model_info(
|
||||
model=litellm_model, custom_llm_provider=split_model[0]
|
||||
)
|
||||
except Exception:
|
||||
litellm_model_info = {}
|
||||
for k, v in litellm_model_info.items():
|
||||
if k not in model_info:
|
||||
|
@ -11956,7 +12005,9 @@ async def model_info_v1(
|
|||
if len(split_model) > 0:
|
||||
litellm_model = split_model[-1]
|
||||
try:
|
||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||
litellm_model_info = litellm.get_model_info(
|
||||
model=litellm_model, custom_llm_provider=split_model[0]
|
||||
)
|
||||
except:
|
||||
litellm_model_info = {}
|
||||
for k, v in litellm_model_info.items():
|
||||
|
@ -12223,6 +12274,7 @@ async def alerting_settings(
|
|||
"/queue/chat/completions",
|
||||
tags=["experimental"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def async_queue_request(
|
||||
request: Request,
|
||||
|
@ -12334,18 +12386,10 @@ async def async_queue_request(
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/ollama_logs", dependencies=[Depends(user_api_key_auth)], tags=["experimental"]
|
||||
)
|
||||
async def retrieve_server_log(request: Request):
|
||||
filepath = os.path.expanduser("~/.ollama/logs/server.log")
|
||||
return FileResponse(filepath)
|
||||
|
||||
|
||||
#### LOGIN ENDPOINTS ####
|
||||
|
||||
|
||||
@app.get("/sso/key/generate", tags=["experimental"])
|
||||
@app.get("/sso/key/generate", tags=["experimental"], include_in_schema=False)
|
||||
async def google_login(request: Request):
|
||||
"""
|
||||
Create Proxy API Keys using Google Workspace SSO. Requires setting PROXY_BASE_URL in .env
|
||||
|
@ -12939,7 +12983,7 @@ def get_image():
|
|||
return FileResponse(logo_path, media_type="image/jpeg")
|
||||
|
||||
|
||||
@app.get("/sso/callback", tags=["experimental"])
|
||||
@app.get("/sso/callback", tags=["experimental"], include_in_schema=False)
|
||||
async def auth_callback(request: Request):
|
||||
"""Verify login"""
|
||||
global general_settings, ui_access_mode, premium_user
|
||||
|
@ -13244,6 +13288,7 @@ async def auth_callback(request: Request):
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def new_invitation(
|
||||
data: InvitationNew, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
|
||||
|
@ -13308,6 +13353,7 @@ async def new_invitation(
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def invitation_info(
|
||||
invitation_id: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth)
|
||||
|
@ -13359,6 +13405,7 @@ async def invitation_info(
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def invitation_update(
|
||||
data: InvitationUpdate,
|
||||
|
@ -13419,6 +13466,7 @@ async def invitation_update(
|
|||
tags=["Invite Links"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=InvitationModel,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def invitation_delete(
|
||||
data: InvitationDelete,
|
||||
|
@ -13471,6 +13519,7 @@ async def invitation_delete(
|
|||
"/config/update",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def update_config(config_info: ConfigYAML):
|
||||
"""
|
||||
|
@ -13628,6 +13677,7 @@ Keep it more precise, to prevent overwrite other values unintentially
|
|||
"/config/field/update",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def update_config_general_settings(
|
||||
data: ConfigFieldUpdate,
|
||||
|
@ -13706,6 +13756,7 @@ async def update_config_general_settings(
|
|||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=ConfigFieldInfo,
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_config_general_settings(
|
||||
field_name: str,
|
||||
|
@ -13766,6 +13817,7 @@ async def get_config_general_settings(
|
|||
"/config/list",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_config_list(
|
||||
config_type: Literal["general_settings"],
|
||||
|
@ -13842,6 +13894,7 @@ async def get_config_list(
|
|||
"/config/field/delete",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def delete_config_general_settings(
|
||||
data: ConfigFieldDelete,
|
||||
|
@ -14097,6 +14150,7 @@ async def get_config():
|
|||
"/config/yaml",
|
||||
tags=["config.yaml"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def config_yaml_endpoint(config_info: ConfigYAML):
|
||||
"""
|
||||
|
@ -14743,6 +14797,22 @@ async def cache_flushall():
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/get/litellm_model_cost_map",
|
||||
include_in_schema=False,
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def get_litellm_model_cost_map():
|
||||
try:
|
||||
_model_cost_map = litellm.model_cost
|
||||
return _model_cost_map
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Internal Server Error ({str(e)})",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/", dependencies=[Depends(user_api_key_auth)])
|
||||
async def home(request: Request):
|
||||
return "LiteLLM: RUNNING"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Optional, List, Any, Literal, Union, TYPE_CHECKING
|
||||
from typing import Optional, List, Any, Literal, Union, TYPE_CHECKING, Tuple
|
||||
import os
|
||||
import subprocess
|
||||
import hashlib
|
||||
|
@ -2103,14 +2103,32 @@ def get_logging_payload(
|
|||
raise e
|
||||
|
||||
|
||||
def _duration_in_seconds(duration: str):
|
||||
match = re.match(r"(\d+)([smhd]?)", duration)
|
||||
def _extract_from_regex(duration: str) -> Tuple[int, str]:
|
||||
match = re.match(r"(\d+)(mo|[smhd]?)", duration)
|
||||
|
||||
if not match:
|
||||
raise ValueError("Invalid duration format")
|
||||
|
||||
value, unit = match.groups()
|
||||
value = int(value)
|
||||
|
||||
return value, unit
|
||||
|
||||
|
||||
def _duration_in_seconds(duration: str) -> int:
|
||||
"""
|
||||
Parameters:
|
||||
- duration:
|
||||
- "<number>s" - seconds
|
||||
- "<number>m" - minutes
|
||||
- "<number>h" - hours
|
||||
- "<number>d" - days
|
||||
- "<number>mo" - months
|
||||
|
||||
Returns time in seconds till when budget needs to be reset
|
||||
"""
|
||||
value, unit = _extract_from_regex(duration=duration)
|
||||
|
||||
if unit == "s":
|
||||
return value
|
||||
elif unit == "m":
|
||||
|
@ -2119,6 +2137,22 @@ def _duration_in_seconds(duration: str):
|
|||
return value * 3600
|
||||
elif unit == "d":
|
||||
return value * 86400
|
||||
elif unit == "mo":
|
||||
now = time.time()
|
||||
current_time = datetime.fromtimestamp(now)
|
||||
|
||||
# Calculate the first day of the next month
|
||||
if current_time.month == 12:
|
||||
next_month = datetime(year=current_time.year + 1, month=1, day=1)
|
||||
else:
|
||||
next_month = datetime(
|
||||
year=current_time.year, month=current_time.month + value, day=1
|
||||
)
|
||||
|
||||
# Calculate the duration until the first day of the next month
|
||||
duration_until_next_month = next_month - current_time
|
||||
return int(duration_until_next_month.total_seconds())
|
||||
|
||||
else:
|
||||
raise ValueError("Unsupported duration unit")
|
||||
|
||||
|
|
|
@ -3618,6 +3618,7 @@ class Router:
|
|||
except Exception:
|
||||
model_info = None
|
||||
# get llm provider
|
||||
model, llm_provider = "", ""
|
||||
try:
|
||||
model, llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=litellm_params.model,
|
||||
|
|
|
@ -503,13 +503,35 @@ async def test_async_vertexai_streaming_response():
|
|||
# asyncio.run(test_async_vertexai_streaming_response())
|
||||
|
||||
|
||||
def test_gemini_pro_vision():
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai"]) # "vertex_ai_beta"
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pro_vision(provider, sync_mode):
|
||||
try:
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
litellm.num_retries = 3
|
||||
if sync_mode:
|
||||
resp = litellm.completion(
|
||||
model="vertex_ai/gemini-1.5-flash-preview-0514",
|
||||
model="{}/gemini-1.5-flash-preview-0514".format(provider),
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Whats in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg"
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
else:
|
||||
resp = await litellm.acompletion(
|
||||
model="{}/gemini-1.5-flash-preview-0514".format(provider),
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
@ -532,6 +554,8 @@ def test_gemini_pro_vision():
|
|||
# DO Not DELETE this ASSERT
|
||||
# Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response
|
||||
assert prompt_tokens == 263 # the gemini api returns 263 to us
|
||||
|
||||
# assert False
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
@ -591,9 +615,111 @@ def test_gemini_pro_vision_base64():
|
|||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
|
||||
@pytest.mark.parametrize("sync_mode", [True]) # "vertex_ai",
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pro_function_calling(sync_mode):
|
||||
async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
|
||||
try:
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Your name is Litellm Bot, you are a helpful assistant",
|
||||
},
|
||||
# User asks for their name and weather in San Francisco
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, what is your name and can you tell me the weather?",
|
||||
},
|
||||
]
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
data = {
|
||||
"model": "{}/gemini-1.5-pro".format(provider),
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
"tool_choice": "required",
|
||||
}
|
||||
if sync_mode:
|
||||
response = litellm.completion(**data)
|
||||
else:
|
||||
response = await litellm.acompletion(**data)
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
assert response.choices[0].message.tool_calls[0].function.arguments is not None
|
||||
assert isinstance(
|
||||
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||
)
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
if "429 Quota exceeded" in str(e):
|
||||
pass
|
||||
else:
|
||||
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pro_json_schema_httpx(provider):
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": """
|
||||
List 5 popular cookie recipes.
|
||||
|
||||
Using this JSON schema:
|
||||
|
||||
Recipe = {"recipe_name": str}
|
||||
|
||||
Return a `list[Recipe]`
|
||||
""",
|
||||
}
|
||||
]
|
||||
|
||||
response = completion(
|
||||
model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
|
||||
messages=messages,
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
|
||||
assert response.choices[0].message.content is not None
|
||||
response_json = json.loads(response.choices[0].message.content)
|
||||
|
||||
assert isinstance(response_json, dict) or isinstance(response_json, list)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
@pytest.mark.parametrize("sync_mode", [True])
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai"])
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pro_function_calling(provider, sync_mode):
|
||||
try:
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
|
@ -655,7 +781,7 @@ async def test_gemini_pro_function_calling(sync_mode):
|
|||
]
|
||||
|
||||
data = {
|
||||
"model": "vertex_ai/gemini-1.5-pro-preview-0514",
|
||||
"model": "{}/gemini-1.5-pro-preview-0514".format(provider),
|
||||
"messages": messages,
|
||||
"tools": tools,
|
||||
}
|
||||
|
@ -810,14 +936,24 @@ def test_vertexai_embedding():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="new test - works locally running into vertex version issues on ci/cd"
|
||||
)
|
||||
def test_vertexai_embedding_embedding_latest():
|
||||
try:
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
|
||||
response = embedding(
|
||||
model="vertex_ai/text-embedding-004",
|
||||
input=["good morning from litellm", "this is another item"],
|
||||
input=["hi"],
|
||||
dimensions=1,
|
||||
auto_truncate=True,
|
||||
task_type="RETRIEVAL_QUERY",
|
||||
)
|
||||
|
||||
assert len(response.data[0]["embedding"]) == 1
|
||||
assert response.usage.prompt_tokens > 0
|
||||
print(f"response:", response)
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
|
|
|
@ -220,13 +220,13 @@ def test_completion_bedrock_claude_sts_oidc_auth():
|
|||
aws_web_identity_token = "oidc/circleci_v2/"
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
|
||||
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
||||
response = completion(
|
||||
response_1 = completion(
|
||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
|
@ -236,8 +236,40 @@ def test_completion_bedrock_claude_sts_oidc_auth():
|
|||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
print(response_1)
|
||||
assert len(response_1.choices) > 0
|
||||
assert len(response_1.choices[0].message.content) > 0
|
||||
|
||||
# This second call is to verify that the cache isn't breaking anything
|
||||
response_2 = completion(
|
||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=5,
|
||||
temperature=0.2,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
print(response_2)
|
||||
assert len(response_2.choices) > 0
|
||||
assert len(response_2.choices[0].message.content) > 0
|
||||
|
||||
# This third call is to verify that the cache isn't used for a different region
|
||||
response_3 = completion(
|
||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=6,
|
||||
temperature=0.3,
|
||||
aws_region_name="us-east-1",
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
print(response_3)
|
||||
assert len(response_3.choices) > 0
|
||||
assert len(response_3.choices[0].message.content) > 0
|
||||
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
@ -255,7 +287,7 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
|
|||
aws_web_identity_token = "oidc/circleci_v2/"
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
|
||||
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||
|
||||
try:
|
||||
|
|
|
@ -7,6 +7,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import get_model_info
|
||||
import pytest
|
||||
|
||||
|
||||
def test_get_model_info_simple_model_name():
|
||||
|
@ -23,3 +24,16 @@ def test_get_model_info_custom_llm_with_model_name():
|
|||
"""
|
||||
model = "anthropic/claude-3-opus-20240229"
|
||||
litellm.get_model_info(model)
|
||||
|
||||
|
||||
def test_get_model_info_custom_llm_with_same_name_vllm():
|
||||
"""
|
||||
Tests if {custom_llm_provider}/{model_name} name given, and model exists in model info, the object is returned
|
||||
"""
|
||||
model = "command-r-plus"
|
||||
provider = "openai" # vllm is openai-compatible
|
||||
try:
|
||||
litellm.get_model_info(model, custom_llm_provider=provider)
|
||||
pytest.fail("Expected get model info to fail for an unmapped model/provider")
|
||||
except Exception:
|
||||
pass
|
||||
|
|
|
@ -2217,6 +2217,7 @@ async def test_create_update_team(prisma_client):
|
|||
tpm_limit=30,
|
||||
rpm_limit=30,
|
||||
),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||
api_key="sk-1234",
|
||||
|
|
|
@ -81,7 +81,7 @@ def test_async_fallbacks(caplog):
|
|||
# Define the expected log messages
|
||||
# - error request, falling back notice, success notice
|
||||
expected_logs = [
|
||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception AuthenticationError: OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception litellm.AuthenticationError: AuthenticationError: OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||
"Successful fallback b/w models.",
|
||||
|
|
|
@ -557,7 +557,13 @@ async def test_completion_predibase_streaming(sync_mode):
|
|||
print(f"complete_response: {complete_response}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except litellm.InternalServerError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
print("ERROR class", e.__class__)
|
||||
print("ERROR message", e)
|
||||
print("ERROR traceback", traceback.format_exc())
|
||||
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
|
@ -1029,7 +1035,8 @@ def test_completion_claude_stream_bad_key():
|
|||
# test_completion_replicate_stream()
|
||||
|
||||
|
||||
def test_vertex_ai_stream():
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai"]) # "vertex_ai_beta"
|
||||
def test_vertex_ai_stream(provider):
|
||||
from litellm.tests.test_amazing_vertex_completion import load_vertex_ai_credentials
|
||||
|
||||
load_vertex_ai_credentials()
|
||||
|
@ -1042,7 +1049,7 @@ def test_vertex_ai_stream():
|
|||
try:
|
||||
print("making request", model)
|
||||
response = completion(
|
||||
model=model,
|
||||
model="{}/{}".format(provider, model),
|
||||
messages=[
|
||||
{"role": "user", "content": "write 10 line code code for saying hi"}
|
||||
],
|
||||
|
|
|
@ -3,6 +3,7 @@ from unittest import mock
|
|||
|
||||
from dotenv import load_dotenv
|
||||
import copy
|
||||
from datetime import datetime
|
||||
|
||||
load_dotenv()
|
||||
import os
|
||||
|
@ -25,6 +26,7 @@ from litellm.utils import (
|
|||
get_max_tokens,
|
||||
get_supported_openai_params,
|
||||
)
|
||||
from litellm.proxy.utils import _duration_in_seconds, _extract_from_regex
|
||||
|
||||
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
|
||||
|
||||
|
@ -395,3 +397,89 @@ def test_get_supported_openai_params() -> None:
|
|||
|
||||
# Unmapped provider
|
||||
assert get_supported_openai_params("nonexistent") is None
|
||||
|
||||
|
||||
def test_redact_msgs_from_logs():
|
||||
"""
|
||||
Tests that turn_off_message_logging does not modify the response_obj
|
||||
|
||||
On the proxy some users were seeing the redaction impact client side responses
|
||||
"""
|
||||
from litellm.litellm_core_utils.redact_messages import (
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
from litellm.utils import Logging
|
||||
|
||||
litellm.turn_off_message_logging = True
|
||||
|
||||
response_obj = litellm.ModelResponse(
|
||||
choices=[
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.",
|
||||
"role": "assistant",
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
_redacted_response_obj = redact_message_input_output_from_logging(
|
||||
result=response_obj,
|
||||
litellm_logging_obj=Logging(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
stream=False,
|
||||
call_type="acompletion",
|
||||
litellm_call_id="1234",
|
||||
start_time=datetime.now(),
|
||||
function_id="1234",
|
||||
),
|
||||
)
|
||||
|
||||
# Assert the response_obj content is NOT modified
|
||||
assert (
|
||||
response_obj.choices[0].message.content
|
||||
== "I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner."
|
||||
)
|
||||
|
||||
litellm.turn_off_message_logging = False
|
||||
print("Test passed")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"duration, unit",
|
||||
[("7s", "s"), ("7m", "m"), ("7h", "h"), ("7d", "d"), ("7mo", "mo")],
|
||||
)
|
||||
def test_extract_from_regex(duration, unit):
|
||||
value, _unit = _extract_from_regex(duration=duration)
|
||||
|
||||
assert value == 7
|
||||
assert _unit == unit
|
||||
|
||||
|
||||
def test_duration_in_seconds():
|
||||
"""
|
||||
Test if duration int is correctly calculated for different str
|
||||
"""
|
||||
import time
|
||||
|
||||
now = time.time()
|
||||
current_time = datetime.fromtimestamp(now)
|
||||
print("current_time={}".format(current_time))
|
||||
# Calculate the first day of the next month
|
||||
if current_time.month == 12:
|
||||
next_month = datetime(year=current_time.year + 1, month=1, day=1)
|
||||
else:
|
||||
next_month = datetime(
|
||||
year=current_time.year, month=current_time.month + 1, day=1
|
||||
)
|
||||
print("next_month={}".format(next_month))
|
||||
# Calculate the duration until the first day of the next month
|
||||
duration_until_next_month = next_month - current_time
|
||||
expected_duration = int(duration_until_next_month.total_seconds())
|
||||
|
||||
value = _duration_in_seconds(duration="1mo")
|
||||
|
||||
assert value - expected_duration < 2
|
||||
|
|
|
@ -323,3 +323,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
|
|||
content: Optional[str]
|
||||
tool_calls: List[ChatCompletionToolCallChunk]
|
||||
role: Literal["assistant"]
|
||||
|
||||
|
||||
class ChatCompletionUsageBlock(TypedDict):
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
|
|
|
@ -9,6 +9,7 @@ from typing_extensions import (
|
|||
runtime_checkable,
|
||||
Required,
|
||||
)
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Field(TypedDict):
|
||||
|
@ -48,6 +49,190 @@ class PartType(TypedDict, total=False):
|
|||
function_response: FunctionResponse
|
||||
|
||||
|
||||
class HttpxFunctionCall(TypedDict):
|
||||
name: str
|
||||
args: dict
|
||||
|
||||
|
||||
class HttpxPartType(TypedDict, total=False):
|
||||
text: str
|
||||
inline_data: BlobType
|
||||
file_data: FileDataType
|
||||
functionCall: HttpxFunctionCall
|
||||
function_response: FunctionResponse
|
||||
|
||||
|
||||
class HttpxContentType(TypedDict, total=False):
|
||||
role: Literal["user", "model"]
|
||||
parts: Required[List[HttpxPartType]]
|
||||
|
||||
|
||||
class ContentType(TypedDict, total=False):
|
||||
role: Literal["user", "model"]
|
||||
parts: Required[List[PartType]]
|
||||
|
||||
|
||||
class SystemInstructions(TypedDict):
|
||||
parts: Required[List[PartType]]
|
||||
|
||||
|
||||
class Schema(TypedDict, total=False):
|
||||
type: Literal["STRING", "INTEGER", "BOOLEAN", "NUMBER", "ARRAY", "OBJECT"]
|
||||
description: str
|
||||
enum: List[str]
|
||||
items: List["Schema"]
|
||||
properties: "Schema"
|
||||
required: List[str]
|
||||
nullable: bool
|
||||
|
||||
|
||||
class FunctionDeclaration(TypedDict, total=False):
|
||||
name: Required[str]
|
||||
description: str
|
||||
parameters: Schema
|
||||
response: Schema
|
||||
|
||||
|
||||
class FunctionCallingConfig(TypedDict, total=False):
|
||||
mode: Literal["ANY", "AUTO", "NONE"]
|
||||
allowed_function_names: List[str]
|
||||
|
||||
|
||||
HarmCategory = Literal[
|
||||
"HARM_CATEGORY_UNSPECIFIED",
|
||||
"HARM_CATEGORY_HATE_SPEECH",
|
||||
"HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||
"HARM_CATEGORY_HARASSMENT",
|
||||
"HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||
]
|
||||
HarmBlockThreshold = Literal[
|
||||
"HARM_BLOCK_THRESHOLD_UNSPECIFIED",
|
||||
"BLOCK_LOW_AND_ABOVE",
|
||||
"BLOCK_MEDIUM_AND_ABOVE",
|
||||
"BLOCK_ONLY_HIGH",
|
||||
"BLOCK_NONE",
|
||||
]
|
||||
HarmBlockMethod = Literal["HARM_BLOCK_METHOD_UNSPECIFIED", "SEVERITY", "PROBABILITY"]
|
||||
|
||||
HarmProbability = Literal[
|
||||
"HARM_PROBABILITY_UNSPECIFIED", "NEGLIGIBLE", "LOW", "MEDIUM", "HIGH"
|
||||
]
|
||||
|
||||
HarmSeverity = Literal[
|
||||
"HARM_SEVERITY_UNSPECIFIED",
|
||||
"HARM_SEVERITY_NEGLIGIBLE",
|
||||
"HARM_SEVERITY_LOW",
|
||||
"HARM_SEVERITY_MEDIUM",
|
||||
"HARM_SEVERITY_HIGH",
|
||||
]
|
||||
|
||||
|
||||
class SafetSettingsConfig(TypedDict, total=False):
|
||||
category: HarmCategory
|
||||
threshold: HarmBlockThreshold
|
||||
max_influential_terms: int
|
||||
method: HarmBlockMethod
|
||||
|
||||
|
||||
class GenerationConfig(TypedDict, total=False):
|
||||
temperature: float
|
||||
top_p: float
|
||||
top_k: float
|
||||
candidate_count: int
|
||||
max_output_tokens: int
|
||||
stop_sequences: List[str]
|
||||
presence_penalty: float
|
||||
frequency_penalty: float
|
||||
response_mime_type: Literal["text/plain", "application/json"]
|
||||
|
||||
|
||||
class Tools(TypedDict):
|
||||
function_declarations: List[FunctionDeclaration]
|
||||
|
||||
|
||||
class ToolConfig(TypedDict):
|
||||
functionCallingConfig: FunctionCallingConfig
|
||||
|
||||
|
||||
class RequestBody(TypedDict, total=False):
|
||||
contents: Required[List[ContentType]]
|
||||
system_instruction: SystemInstructions
|
||||
tools: Tools
|
||||
toolConfig: ToolConfig
|
||||
safetySettings: SafetSettingsConfig
|
||||
generationConfig: GenerationConfig
|
||||
|
||||
|
||||
class SafetyRatings(TypedDict):
|
||||
category: HarmCategory
|
||||
probability: HarmProbability
|
||||
probabilityScore: int
|
||||
severity: HarmSeverity
|
||||
blocked: bool
|
||||
|
||||
|
||||
class Date(TypedDict):
|
||||
year: int
|
||||
month: int
|
||||
date: int
|
||||
|
||||
|
||||
class Citation(TypedDict):
|
||||
startIndex: int
|
||||
endIndex: int
|
||||
uri: str
|
||||
title: str
|
||||
license: str
|
||||
publicationDate: Date
|
||||
|
||||
|
||||
class CitationMetadata(TypedDict):
|
||||
citations: List[Citation]
|
||||
|
||||
|
||||
class SearchEntryPoint(TypedDict, total=False):
|
||||
renderedContent: str
|
||||
sdkBlob: str
|
||||
|
||||
|
||||
class GroundingMetadata(TypedDict, total=False):
|
||||
webSearchQueries: List[str]
|
||||
searchEntryPoint: SearchEntryPoint
|
||||
|
||||
|
||||
class Candidates(TypedDict, total=False):
|
||||
index: int
|
||||
content: HttpxContentType
|
||||
finishReason: Literal[
|
||||
"FINISH_REASON_UNSPECIFIED",
|
||||
"STOP",
|
||||
"MAX_TOKENS",
|
||||
"SAFETY",
|
||||
"RECITATION",
|
||||
"OTHER",
|
||||
"BLOCKLIST",
|
||||
"PROHIBITED_CONTENT",
|
||||
"SPII",
|
||||
]
|
||||
safetyRatings: SafetyRatings
|
||||
citationMetadata: CitationMetadata
|
||||
groundingMetadata: GroundingMetadata
|
||||
finishMessage: str
|
||||
|
||||
|
||||
class PromptFeedback(TypedDict):
|
||||
blockReason: str
|
||||
safetyRatings: List[SafetyRatings]
|
||||
blockReasonMessage: str
|
||||
|
||||
|
||||
class UsageMetadata(TypedDict):
|
||||
promptTokenCount: int
|
||||
totalTokenCount: int
|
||||
candidatesTokenCount: int
|
||||
|
||||
|
||||
class GenerateContentResponseBody(TypedDict, total=False):
|
||||
candidates: Required[List[Candidates]]
|
||||
promptFeedback: PromptFeedback
|
||||
usageMetadata: Required[UsageMetadata]
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
from typing import List, Optional, Union, Dict, Tuple, Literal
|
||||
from typing_extensions import TypedDict
|
||||
from enum import Enum
|
||||
from typing_extensions import override, Required, Dict
|
||||
from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk
|
||||
|
||||
|
||||
class LiteLLMCommonStrings(Enum):
|
||||
|
@ -37,3 +39,12 @@ class ModelInfo(TypedDict):
|
|||
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
||||
]
|
||||
supported_openai_params: Optional[List[str]]
|
||||
|
||||
|
||||
class GenericStreamingChunk(TypedDict):
|
||||
text: Required[str]
|
||||
tool_use: Optional[ChatCompletionToolCallChunk]
|
||||
is_finished: Required[bool]
|
||||
finish_reason: Required[str]
|
||||
usage: Optional[ChatCompletionUsageBlock]
|
||||
index: int
|
||||
|
|
203
litellm/utils.py
203
litellm/utils.py
|
@ -35,6 +35,9 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||
from litellm.caching import DualCache
|
||||
from litellm.types.utils import CostPerToken, ProviderField, ModelInfo
|
||||
from litellm.litellm_core_utils.redact_messages import (
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
|
||||
oidc_cache = DualCache()
|
||||
|
||||
|
@ -518,15 +521,18 @@ class Choices(OpenAIObject):
|
|||
self,
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
message=None,
|
||||
message: Optional[Union[Message, dict]] = None,
|
||||
logprobs=None,
|
||||
enhancements=None,
|
||||
**params,
|
||||
):
|
||||
super(Choices, self).__init__(**params)
|
||||
self.finish_reason = (
|
||||
map_finish_reason(finish_reason) or "stop"
|
||||
if finish_reason is not None:
|
||||
self.finish_reason = map_finish_reason(
|
||||
finish_reason
|
||||
) # set finish_reason for all responses
|
||||
else:
|
||||
self.finish_reason = "stop"
|
||||
self.index = index
|
||||
if message is None:
|
||||
self.message = Message()
|
||||
|
@ -1134,13 +1140,15 @@ class TranscriptionResponse(OpenAIObject):
|
|||
def print_verbose(
|
||||
print_statement,
|
||||
logger_only: bool = False,
|
||||
log_level: Literal["DEBUG", "INFO"] = "DEBUG",
|
||||
log_level: Literal["DEBUG", "INFO", "ERROR"] = "DEBUG",
|
||||
):
|
||||
try:
|
||||
if log_level == "DEBUG":
|
||||
verbose_logger.debug(print_statement)
|
||||
elif log_level == "INFO":
|
||||
verbose_logger.info(print_statement)
|
||||
elif log_level == "ERROR":
|
||||
verbose_logger.error(print_statement)
|
||||
if litellm.set_verbose == True and logger_only == False:
|
||||
print(print_statement) # noqa
|
||||
except:
|
||||
|
@ -1473,7 +1481,9 @@ class Logging:
|
|||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||
)
|
||||
self.redact_message_input_output_from_logging(result=original_response)
|
||||
original_response = redact_message_input_output_from_logging(
|
||||
litellm_logging_obj=self, result=original_response
|
||||
)
|
||||
# Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
||||
|
||||
callbacks = litellm.input_callback + self.dynamic_input_callbacks
|
||||
|
@ -1624,6 +1634,12 @@ class Logging:
|
|||
end_time=end_time,
|
||||
)
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
),
|
||||
log_level="ERROR",
|
||||
)
|
||||
complete_streaming_response = None
|
||||
else:
|
||||
self.sync_streaming_chunks.append(result)
|
||||
|
@ -1664,7 +1680,9 @@ class Logging:
|
|||
else:
|
||||
callbacks = litellm.success_callback
|
||||
|
||||
self.redact_message_input_output_from_logging(result=result)
|
||||
result = redact_message_input_output_from_logging(
|
||||
result=result, litellm_logging_obj=self
|
||||
)
|
||||
|
||||
for callback in callbacks:
|
||||
try:
|
||||
|
@ -2214,7 +2232,10 @@ class Logging:
|
|||
capture_exception(e)
|
||||
except:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
|
||||
"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
),
|
||||
log_level="ERROR",
|
||||
)
|
||||
pass
|
||||
|
||||
|
@ -2224,7 +2245,7 @@ class Logging:
|
|||
"""
|
||||
Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
|
||||
"""
|
||||
print_verbose(f"Logging Details LiteLLM-Async Success Call")
|
||||
print_verbose("Logging Details LiteLLM-Async Success Call")
|
||||
start_time, end_time, result = self._success_handler_helper_fn(
|
||||
start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
|
||||
)
|
||||
|
@ -2243,7 +2264,10 @@ class Logging:
|
|||
)
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
f"Error occurred building stream chunk: {traceback.format_exc()}"
|
||||
"Error occurred building stream chunk in success logging: {}\n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
),
|
||||
log_level="ERROR",
|
||||
)
|
||||
complete_streaming_response = None
|
||||
else:
|
||||
|
@ -2254,7 +2278,7 @@ class Logging:
|
|||
complete_streaming_response
|
||||
)
|
||||
try:
|
||||
if self.model_call_details.get("cache_hit", False) == True:
|
||||
if self.model_call_details.get("cache_hit", False) is True:
|
||||
self.model_call_details["response_cost"] = 0.0
|
||||
else:
|
||||
# check if base_model set on azure
|
||||
|
@ -2270,8 +2294,8 @@ class Logging:
|
|||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||
)
|
||||
except litellm.NotFoundError as e:
|
||||
verbose_logger.debug(
|
||||
f"Model={self.model} not found in completion cost map."
|
||||
verbose_logger.error(
|
||||
f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
|
||||
)
|
||||
self.model_call_details["response_cost"] = None
|
||||
|
||||
|
@ -2291,7 +2315,9 @@ class Logging:
|
|||
else:
|
||||
callbacks = litellm._async_success_callback
|
||||
|
||||
self.redact_message_input_output_from_logging(result=result)
|
||||
result = redact_message_input_output_from_logging(
|
||||
result=result, litellm_logging_obj=self
|
||||
)
|
||||
|
||||
for callback in callbacks:
|
||||
# check if callback can run for this request
|
||||
|
@ -2501,7 +2527,9 @@ class Logging:
|
|||
|
||||
result = None # result sent to all loggers, init this to None incase it's not created
|
||||
|
||||
self.redact_message_input_output_from_logging(result=result)
|
||||
result = redact_message_input_output_from_logging(
|
||||
result=result, litellm_logging_obj=self
|
||||
)
|
||||
for callback in callbacks:
|
||||
try:
|
||||
if callback == "lite_debugger":
|
||||
|
@ -2725,41 +2753,6 @@ class Logging:
|
|||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
|
||||
)
|
||||
|
||||
def redact_message_input_output_from_logging(self, result):
|
||||
"""
|
||||
Removes messages, prompts, input, response from logging. This modifies the data in-place
|
||||
only redacts when litellm.turn_off_message_logging == True
|
||||
"""
|
||||
# check if user opted out of logging message/response to callbacks
|
||||
if litellm.turn_off_message_logging is True:
|
||||
# remove messages, prompts, input, response from logging
|
||||
self.model_call_details["messages"] = [
|
||||
{"role": "user", "content": "redacted-by-litellm"}
|
||||
]
|
||||
self.model_call_details["prompt"] = ""
|
||||
self.model_call_details["input"] = ""
|
||||
|
||||
# response cleaning
|
||||
# ChatCompletion Responses
|
||||
if self.stream and "complete_streaming_response" in self.model_call_details:
|
||||
_streaming_response = self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
]
|
||||
for choice in _streaming_response.choices:
|
||||
if isinstance(choice, litellm.Choices):
|
||||
choice.message.content = "redacted-by-litellm"
|
||||
elif isinstance(choice, litellm.utils.StreamingChoices):
|
||||
choice.delta.content = "redacted-by-litellm"
|
||||
else:
|
||||
if result is not None:
|
||||
if isinstance(result, litellm.ModelResponse):
|
||||
if hasattr(result, "choices") and result.choices is not None:
|
||||
for choice in result.choices:
|
||||
if isinstance(choice, litellm.Choices):
|
||||
choice.message.content = "redacted-by-litellm"
|
||||
elif isinstance(choice, litellm.utils.StreamingChoices):
|
||||
choice.delta.content = "redacted-by-litellm"
|
||||
|
||||
|
||||
def exception_logging(
|
||||
additional_args={},
|
||||
|
@ -2822,7 +2815,9 @@ class Rules:
|
|||
raise litellm.APIResponseValidationError(message="LLM Response failed post-call-rule check", llm_provider="", model=model) # type: ignore
|
||||
return True
|
||||
|
||||
def post_call_rules(self, input: str, model: str):
|
||||
def post_call_rules(self, input: Optional[str], model: str) -> bool:
|
||||
if input is None:
|
||||
return True
|
||||
for rule in litellm.post_call_rules:
|
||||
if callable(rule):
|
||||
decision = rule(input)
|
||||
|
@ -3101,9 +3096,9 @@ def client(original_function):
|
|||
pass
|
||||
else:
|
||||
if isinstance(original_response, ModelResponse):
|
||||
model_response = original_response["choices"][0]["message"][
|
||||
"content"
|
||||
]
|
||||
model_response = original_response.choices[
|
||||
0
|
||||
].message.content
|
||||
### POST-CALL RULES ###
|
||||
rules_obj.post_call_rules(input=model_response, model=model)
|
||||
except Exception as e:
|
||||
|
@ -3563,7 +3558,7 @@ def client(original_function):
|
|||
if cached_result is not None and not isinstance(
|
||||
cached_result, list
|
||||
):
|
||||
print_verbose(f"Cache Hit!")
|
||||
print_verbose("Cache Hit!", log_level="INFO")
|
||||
cache_hit = True
|
||||
end_time = datetime.datetime.now()
|
||||
(
|
||||
|
@ -4898,6 +4893,18 @@ def get_optional_params_embeddings(
|
|||
)
|
||||
final_params = {**optional_params, **kwargs}
|
||||
return final_params
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model,
|
||||
custom_llm_provider="vertex_ai",
|
||||
request_type="embeddings",
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = litellm.VertexAITextEmbeddingConfig().map_openai_params(
|
||||
non_default_params=non_default_params, optional_params={}
|
||||
)
|
||||
final_params = {**optional_params, **kwargs}
|
||||
return final_params
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
if len(non_default_params.keys()) > 0:
|
||||
if litellm.drop_params is True: # drop the unsupported non-default values
|
||||
|
@ -4931,7 +4938,18 @@ def get_optional_params_embeddings(
|
|||
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
|
||||
)
|
||||
return {**non_default_params, **kwargs}
|
||||
|
||||
if custom_llm_provider == "mistral":
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model,
|
||||
custom_llm_provider="mistral",
|
||||
request_type="embeddings",
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
|
||||
non_default_params=non_default_params, optional_params={}
|
||||
)
|
||||
final_params = {**optional_params, **kwargs}
|
||||
return final_params
|
||||
if (
|
||||
custom_llm_provider != "openai"
|
||||
and custom_llm_provider != "azure"
|
||||
|
@ -5381,6 +5399,16 @@ def get_optional_params(
|
|||
print_verbose(
|
||||
f"(end) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK - optional_params: {optional_params}"
|
||||
)
|
||||
elif custom_llm_provider == "vertex_ai_beta":
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = litellm.VertexGeminiConfig().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
)
|
||||
elif (
|
||||
custom_llm_provider == "vertex_ai" and model in litellm.vertex_anthropic_models
|
||||
):
|
||||
|
@ -6340,7 +6368,10 @@ def get_supported_openai_params(
|
|||
"max_retries",
|
||||
]
|
||||
elif custom_llm_provider == "mistral":
|
||||
if request_type == "chat_completion":
|
||||
return litellm.MistralConfig().get_supported_openai_params()
|
||||
elif request_type == "embeddings":
|
||||
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
|
||||
elif custom_llm_provider == "replicate":
|
||||
return [
|
||||
"stream",
|
||||
|
@ -6382,7 +6413,10 @@ def get_supported_openai_params(
|
|||
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
||||
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
if request_type == "chat_completion":
|
||||
return litellm.VertexAIConfig().get_supported_openai_params()
|
||||
elif request_type == "embeddings":
|
||||
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
|
||||
elif custom_llm_provider == "sagemaker":
|
||||
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
|
||||
elif custom_llm_provider == "aleph_alpha":
|
||||
|
@ -6919,13 +6953,14 @@ def get_max_tokens(model: str):
|
|||
)
|
||||
|
||||
|
||||
def get_model_info(model: str) -> ModelInfo:
|
||||
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
|
||||
"""
|
||||
Get a dict for the maximum tokens (context window),
|
||||
input_cost_per_token, output_cost_per_token for a given model.
|
||||
|
||||
Parameters:
|
||||
model (str): The name of the model.
|
||||
- model (str): The name of the model.
|
||||
- custom_llm_provider (str | null): the provider used for the model. If provided, used to check if the litellm model info is for that provider.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the following information:
|
||||
|
@ -6979,12 +7014,14 @@ def get_model_info(model: str) -> ModelInfo:
|
|||
if model in azure_llms:
|
||||
model = azure_llms[model]
|
||||
##########################
|
||||
if custom_llm_provider is None:
|
||||
# Get custom_llm_provider
|
||||
split_model, custom_llm_provider = model, ""
|
||||
try:
|
||||
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
split_model = model
|
||||
#########################
|
||||
|
||||
supported_openai_params = litellm.get_supported_openai_params(
|
||||
|
@ -7009,10 +7046,20 @@ def get_model_info(model: str) -> ModelInfo:
|
|||
if model in litellm.model_cost:
|
||||
_model_info = litellm.model_cost[model]
|
||||
_model_info["supported_openai_params"] = supported_openai_params
|
||||
if (
|
||||
"litellm_provider" in _model_info
|
||||
and _model_info["litellm_provider"] != custom_llm_provider
|
||||
):
|
||||
raise Exception
|
||||
return _model_info
|
||||
if split_model in litellm.model_cost:
|
||||
_model_info = litellm.model_cost[split_model]
|
||||
_model_info["supported_openai_params"] = supported_openai_params
|
||||
if (
|
||||
"litellm_provider" in _model_info
|
||||
and _model_info["litellm_provider"] != custom_llm_provider
|
||||
):
|
||||
raise Exception
|
||||
return _model_info
|
||||
else:
|
||||
raise ValueError(
|
||||
|
@ -7192,6 +7239,9 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
|
|||
elif custom_llm_provider == "ollama":
|
||||
return litellm.OllamaConfig().get_required_params()
|
||||
|
||||
elif custom_llm_provider == "azure_ai":
|
||||
return litellm.AzureAIStudioConfig().get_required_params()
|
||||
|
||||
else:
|
||||
return []
|
||||
|
||||
|
@ -10066,6 +10116,14 @@ def get_secret(
|
|||
return oidc_token
|
||||
else:
|
||||
raise ValueError("Github OIDC provider failed")
|
||||
elif oidc_provider == "azure":
|
||||
# https://azure.github.io/azure-workload-identity/docs/quick-start.html
|
||||
azure_federated_token_file = os.getenv("AZURE_FEDERATED_TOKEN_FILE")
|
||||
if azure_federated_token_file is None:
|
||||
raise ValueError("AZURE_FEDERATED_TOKEN_FILE not found in environment")
|
||||
with open(azure_federated_token_file, "r") as f:
|
||||
oidc_token = f.read()
|
||||
return oidc_token
|
||||
else:
|
||||
raise ValueError("Unsupported OIDC provider")
|
||||
|
||||
|
@ -11218,6 +11276,34 @@ class CustomStreamWrapper:
|
|||
)
|
||||
else:
|
||||
completion_obj["content"] = str(chunk)
|
||||
elif self.custom_llm_provider and (
|
||||
self.custom_llm_provider == "vertex_ai_beta"
|
||||
):
|
||||
from litellm.types.utils import (
|
||||
GenericStreamingChunk as UtilsStreamingChunk,
|
||||
)
|
||||
|
||||
if self.received_finish_reason is not None:
|
||||
raise StopIteration
|
||||
response_obj: UtilsStreamingChunk = chunk
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
|
||||
if (
|
||||
self.stream_options
|
||||
and self.stream_options.get("include_usage", False) is True
|
||||
and response_obj["usage"] is not None
|
||||
):
|
||||
self.sent_stream_usage = True
|
||||
model_response.usage = litellm.Usage(
|
||||
prompt_tokens=response_obj["usage"]["prompt_tokens"],
|
||||
completion_tokens=response_obj["usage"]["completion_tokens"],
|
||||
total_tokens=response_obj["usage"]["total_tokens"],
|
||||
)
|
||||
|
||||
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
|
||||
completion_obj["tool_calls"] = [response_obj["tool_use"]]
|
||||
elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
|
||||
import proto # type: ignore
|
||||
|
||||
|
@ -11895,6 +11981,7 @@ class CustomStreamWrapper:
|
|||
or self.custom_llm_provider == "ollama"
|
||||
or self.custom_llm_provider == "ollama_chat"
|
||||
or self.custom_llm_provider == "vertex_ai"
|
||||
or self.custom_llm_provider == "vertex_ai_beta"
|
||||
or self.custom_llm_provider == "sagemaker"
|
||||
or self.custom_llm_provider == "gemini"
|
||||
or self.custom_llm_provider == "replicate"
|
||||
|
|
10
log.txt
Normal file
10
log.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
============================= test session starts ==============================
|
||||
platform darwin -- Python 3.11.4, pytest-8.2.0, pluggy-1.5.0 -- /Users/krrishdholakia/Documents/litellm/litellm/proxy/myenv/bin/python3.11
|
||||
cachedir: .pytest_cache
|
||||
rootdir: /Users/krrishdholakia/Documents/litellm
|
||||
configfile: pyproject.toml
|
||||
plugins: logfire-0.35.0, asyncio-0.23.6, mock-3.14.0, anyio-4.2.0
|
||||
asyncio: mode=Mode.STRICT
|
||||
collecting ... collected 0 items
|
||||
|
||||
============================ no tests ran in 0.00s =============================
|
|
@ -3347,6 +3347,24 @@
|
|||
"litellm_provider": "deepinfra",
|
||||
"mode": "chat"
|
||||
},
|
||||
"deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 8191,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000008,
|
||||
"output_cost_per_token": 0.00000008,
|
||||
"litellm_provider": "deepinfra",
|
||||
"mode": "chat"
|
||||
},
|
||||
"deepinfra/meta-llama/Meta-Llama-3-70B-Instruct": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 8191,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000059,
|
||||
"output_cost_per_token": 0.00000079,
|
||||
"litellm_provider": "deepinfra",
|
||||
"mode": "chat"
|
||||
},
|
||||
"deepinfra/01-ai/Yi-34B-200K": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
|
|
@ -85,6 +85,9 @@ model_list:
|
|||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: mistral-embed
|
||||
litellm_params:
|
||||
model: mistral/mistral-embed
|
||||
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
|
||||
litellm_params:
|
||||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.40.9"
|
||||
version = "1.40.12"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -85,7 +85,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.40.9"
|
||||
version = "1.40.12"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -22,6 +22,7 @@ async def generate_key(
|
|||
"text-embedding-ada-002",
|
||||
"dall-e-2",
|
||||
"fake-openai-endpoint-2",
|
||||
"mistral-embed",
|
||||
],
|
||||
):
|
||||
url = "http://0.0.0.0:4000/key/generate"
|
||||
|
@ -197,14 +198,14 @@ async def completion(session, key):
|
|||
return response
|
||||
|
||||
|
||||
async def embeddings(session, key):
|
||||
async def embeddings(session, key, model="text-embedding-ada-002"):
|
||||
url = "http://0.0.0.0:4000/embeddings"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = {
|
||||
"model": "text-embedding-ada-002",
|
||||
"model": model,
|
||||
"input": ["hello world"],
|
||||
}
|
||||
|
||||
|
@ -408,6 +409,9 @@ async def test_embeddings():
|
|||
key_2 = key_gen["key"]
|
||||
await embeddings(session=session, key=key_2)
|
||||
|
||||
# embedding request with non OpenAI model
|
||||
await embeddings(session=session, key=key, model="mistral-embed")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_generation():
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
|||
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -1 +0,0 @@
|
|||
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue