Merge branch 'main' into feat/friendliai

This commit is contained in:
Wonseok Lee (Jack) 2024-06-13 09:59:56 +09:00 committed by GitHub
commit 776c75c1e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
99 changed files with 202794 additions and 632 deletions

View file

@ -202,6 +202,7 @@ jobs:
-e REDIS_PORT=$REDIS_PORT \ -e REDIS_PORT=$REDIS_PORT \
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \ -e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \ -e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e AWS_REGION_NAME=$AWS_REGION_NAME \ -e AWS_REGION_NAME=$AWS_REGION_NAME \

View file

@ -150,7 +150,7 @@ $ litellm --config /path/to/config.yaml
```bash ```bash
curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \ curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \ -H "Authorization: Bearer sk-1234"
``` ```
**Create a Thread** **Create a Thread**
@ -162,6 +162,14 @@ curl http://0.0.0.0:4000/v1/threads \
-d '' -d ''
``` ```
**Get a Thread**
```bash
curl http://0.0.0.0:4000/v1/threads/{thread_id} \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234"
```
**Add Messages to the Thread** **Add Messages to the Thread**
```bash ```bash

View file

@ -212,6 +212,94 @@ If you run the code two times, response1 will use the cache from the first run t
</TabItem> </TabItem>
</Tabs>
## Switch Cache On / Off Per LiteLLM Call
LiteLLM supports 4 cache-controls:
- `no-cache`: *Optional(bool)* When `True`, Will not return a cached response, but instead call the actual endpoint.
- `no-store`: *Optional(bool)* When `True`, Will not cache the response.
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
[Let us know if you need more](https://github.com/BerriAI/litellm/issues/1218)
<Tabs>
<TabItem value="no-cache" label="No-Cache">
Example usage `no-cache` - When `True`, Will not return a cached response
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"no-cache": True},
)
```
</TabItem>
<TabItem value="no-store" label="No-Store">
Example usage `no-store` - When `True`, Will not cache the response.
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"no-store": True},
)
```
</TabItem>
<TabItem value="ttl" label="ttl">
Example usage `ttl` - cache the response for 10 seconds
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"ttl": 10},
)
```
</TabItem>
<TabItem value="s-maxage" label="s-maxage">
Example usage `s-maxage` - Will only accept cached responses for 60 seconds
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"s-maxage": 60},
)
```
</TabItem>
</Tabs> </Tabs>
## Cache Context Manager - Enable, Disable, Update Cache ## Cache Context Manager - Enable, Disable, Update Cache

View file

@ -0,0 +1,46 @@
import Image from '@theme/IdealImage';
# Raw Request/Response Logging
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
**on SDK**
```python
# pip install langfuse
import litellm
import os
# log raw request/response
litellm.log_raw_request_response = True
# from https://cloud.langfuse.com/
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
# Optional, defaults to https://cloud.langfuse.com
os.environ["LANGFUSE_HOST"] # optional
# LLM API Keys
os.environ['OPENAI_API_KEY']=""
# set langfuse as a callback, litellm will send the data to langfuse
litellm.success_callback = ["langfuse"]
# openai call
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "Hi 👋 - i'm openai"}
]
)
```
**on Proxy**
```yaml
litellm_settings:
log_raw_request_response: True
```
**Expected Log**
<Image img={require('../../img/raw_request_log.png')}/>

View file

@ -1,3 +1,5 @@
# llmcord.py
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted. llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
Github: https://github.com/jakobdylanc/discord-llm-chatbot Github: https://github.com/jakobdylanc/discord-llm-chatbot

View file

@ -11,7 +11,7 @@ LiteLLM supports
:::info :::info
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
::: :::
@ -229,17 +229,6 @@ assert isinstance(
``` ```
### Setting `anthropic-beta` Header in Requests
Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
```python
response = completion(
model="anthropic/claude-3-opus-20240229",
messages=messages,
tools=tools,
)
```
### Forcing Anthropic Tool Use ### Forcing Anthropic Tool Use

View file

@ -3,53 +3,155 @@ import TabItem from '@theme/TabItem';
# Azure AI Studio # Azure AI Studio
**Ensure the following:** LiteLLM supports all models on Azure AI Studio
1. The API Base passed ends in the `/v1/` prefix
example:
```python
api_base = "https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/"
```
2. The `model` passed is listed in [supported models](#supported-models). You **DO NOT** Need to pass your deployment name to litellm. Example `model=azure/Mistral-large-nmefg`
## Usage ## Usage
<Tabs> <Tabs>
<TabItem value="sdk" label="SDK"> <TabItem value="sdk" label="SDK">
### ENV VAR
```python ```python
import litellm import os
response = litellm.completion( os.environ["AZURE_API_API_KEY"] = ""
model="azure/command-r-plus", os.environ["AZURE_AI_API_BASE"] = ""
api_base="<your-deployment-base>/v1/" ```
api_key="eskk******"
messages=[{"role": "user", "content": "What is the meaning of life?"}], ### Example Call
```python
from litellm import completion
import os
## set ENV variables
os.environ["AZURE_API_API_KEY"] = "azure ai key"
os.environ["AZURE_AI_API_BASE"] = "azure ai base url" # e.g.: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/
# predibase llama-3 call
response = completion(
model="azure_ai/command-r-plus",
messages = [{ "content": "Hello, how are you?","role": "user"}]
) )
``` ```
</TabItem> </TabItem>
<TabItem value="proxy" label="PROXY"> <TabItem value="proxy" label="PROXY">
## Sample Usage - LiteLLM Proxy
1. Add models to your config.yaml 1. Add models to your config.yaml
```yaml ```yaml
model_list: model_list:
- model_name: mistral
litellm_params:
model: azure/mistral-large-latest
api_base: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/
api_key: JGbKodRcTp****
- model_name: command-r-plus - model_name: command-r-plus
litellm_params: litellm_params:
model: azure/command-r-plus model: azure_ai/command-r-plus
api_key: os.environ/AZURE_COHERE_API_KEY api_key: os.environ/AZURE_AI_API_KEY
api_base: os.environ/AZURE_COHERE_API_BASE api_base: os.environ/AZURE_AI_API_BASE
``` ```
2. Start the proxy
```bash
$ litellm --config /path/to/config.yaml --debug
```
3. Send Request to LiteLLM Proxy Server
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
base_url="http://0.0.0.0:4000" # litellm-proxy-base url
)
response = client.chat.completions.create(
model="command-r-plus",
messages = [
{
"role": "system",
"content": "Be a good human!"
},
{
"role": "user",
"content": "What do you know about earth?"
}
]
)
print(response)
```
</TabItem>
<TabItem value="curl" label="curl">
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "command-r-plus",
"messages": [
{
"role": "system",
"content": "Be a good human!"
},
{
"role": "user",
"content": "What do you know about earth?"
}
],
}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
## Passing additional params - max_tokens, temperature
See all litellm.completion supported params [here](../completion/input.md#translated-openai-params)
```python
# !pip install litellm
from litellm import completion
import os
## set ENV variables
os.environ["AZURE_AI_API_KEY"] = "azure ai api key"
os.environ["AZURE_AI_API_BASE"] = "azure ai api base"
# command r plus call
response = completion(
model="azure_ai/command-r-plus",
messages = [{ "content": "Hello, how are you?","role": "user"}],
max_tokens=20,
temperature=0.5
)
```
**proxy**
```yaml
model_list:
- model_name: command-r-plus
litellm_params:
model: azure_ai/command-r-plus
api_key: os.environ/AZURE_AI_API_KEY
api_base: os.environ/AZURE_AI_API_BASE
max_tokens: 20
temperature: 0.5
```
2. Start the proxy 2. Start the proxy
```bash ```bash
@ -103,9 +205,6 @@ response = litellm.completion(
</Tabs> </Tabs>
</TabItem>
</Tabs>
## Function Calling ## Function Calling
<Tabs> <Tabs>
@ -115,8 +214,8 @@ response = litellm.completion(
from litellm import completion from litellm import completion
# set env # set env
os.environ["AZURE_MISTRAL_API_KEY"] = "your-api-key" os.environ["AZURE_AI_API_KEY"] = "your-api-key"
os.environ["AZURE_MISTRAL_API_BASE"] = "your-api-base" os.environ["AZURE_AI_API_BASE"] = "your-api-base"
tools = [ tools = [
{ {
@ -141,9 +240,7 @@ tools = [
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
response = completion( response = completion(
model="azure/mistral-large-latest", model="azure_ai/mistral-large-latest",
api_base=os.getenv("AZURE_MISTRAL_API_BASE")
api_key=os.getenv("AZURE_MISTRAL_API_KEY")
messages=messages, messages=messages,
tools=tools, tools=tools,
tool_choice="auto", tool_choice="auto",
@ -206,10 +303,12 @@ curl http://0.0.0.0:4000/v1/chat/completions \
## Supported Models ## Supported Models
LiteLLM supports **ALL** azure ai models. Here's a few examples:
| Model Name | Function Call | | Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Cohere command-r-plus | `completion(model="azure/command-r-plus", messages)` | | Cohere command-r-plus | `completion(model="azure/command-r-plus", messages)` |
| Cohere ommand-r | `completion(model="azure/command-r", messages)` | | Cohere command-r | `completion(model="azure/command-r", messages)` |
| mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` | | mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` |

View file

@ -144,16 +144,135 @@ print(response)
</TabItem> </TabItem>
</Tabs> </Tabs>
## Set temperature, top p, etc.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
temperature=0.7,
top_p=1
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Set on yaml**
```yaml
model_list:
- model_name: bedrock-claude-v1
litellm_params:
model: bedrock/anthropic.claude-instant-v1
temperature: <your-temp>
top_p: <your-top-p>
```
**Set on request**
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
temperature=0.7,
top_p=1
)
print(response)
```
</TabItem>
</Tabs>
## Pass provider-specific params
If you pass a non-openai param to litellm, we'll assume it's provider-specific and send it as a kwarg in the request body. [See more](../completion/input.md#provider-specific-params)
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Set on yaml**
```yaml
model_list:
- model_name: bedrock-claude-v1
litellm_params:
model: bedrock/anthropic.claude-instant-v1
top_k: 1 # 👈 PROVIDER-SPECIFIC PARAM
```
**Set on request**
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
temperature=0.7,
extra_body={
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
}
)
print(response)
```
</TabItem>
</Tabs>
## Usage - Function Calling ## Usage - Function Calling
:::info LiteLLM uses Bedrock's Converse API for making tool calls
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
You can see the raw response via `response._hidden_params["original_response"]`.
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
:::
```python ```python
from litellm import completion from litellm import completion
@ -361,47 +480,6 @@ response = completion(
) )
``` ```
### Passing an external BedrockRuntime.Client as a parameter - Completion()
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
Create a client from session credentials:
```python
import boto3
from litellm import completion
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name="us-east-1",
aws_access_key_id="",
aws_secret_access_key="",
aws_session_token="",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
Create a client from AWS profile in `~/.aws/config`:
```python
import boto3
from litellm import completion
dev_session = boto3.Session(profile_name="dev-profile")
bedrock = dev_session.client(
service_name="bedrock-runtime",
region_name="us-east-1",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
### SSO Login (AWS Profile) ### SSO Login (AWS Profile)
- Set `AWS_PROFILE` environment variable - Set `AWS_PROFILE` environment variable
- Make bedrock completion call - Make bedrock completion call
@ -464,6 +542,56 @@ response = completion(
) )
``` ```
### Passing an external BedrockRuntime.Client as a parameter - Completion()
:::warning
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
:::
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
Create a client from session credentials:
```python
import boto3
from litellm import completion
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name="us-east-1",
aws_access_key_id="",
aws_secret_access_key="",
aws_session_token="",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
Create a client from AWS profile in `~/.aws/config`:
```python
import boto3
from litellm import completion
dev_session = boto3.Session(profile_name="dev-profile")
bedrock = dev_session.client(
service_name="bedrock-runtime",
region_name="us-east-1",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
## Provisioned throughput models ## Provisioned throughput models
To use provisioned throughput Bedrock models pass To use provisioned throughput Bedrock models pass
- `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models) - `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models)

View file

@ -1,10 +1,13 @@
# 🆕 Clarifai # Clarifai
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai. Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
:::warning
Streaming is not yet supported on using clarifai and litellm. Tracking support here: https://github.com/BerriAI/litellm/issues/4162
:::
## Pre-Requisites ## Pre-Requisites
`pip install clarifai`
`pip install litellm` `pip install litellm`
## Required Environment Variables ## Required Environment Variables
@ -12,6 +15,7 @@ To obtain your Clarifai Personal access token follow this [link](https://docs.cl
```python ```python
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
``` ```
## Usage ## Usage
@ -68,7 +72,7 @@ Example Usage - Note: liteLLM supports all models deployed on Clarifai
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`| | clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` | | clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
## Mistal LLMs ## Mistral LLMs
| Model Name | Function Call | | Model Name | Function Call |
|---------------------------------------------|------------------------------------------------------------------------| |---------------------------------------------|------------------------------------------------------------------------|
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` | | clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |

View file

@ -125,11 +125,12 @@ See all litellm.completion supported params [here](../completion/input.md#transl
from litellm import completion from litellm import completion
import os import os
## set ENV variables ## set ENV variables
os.environ["PREDIBASE_API_KEY"] = "predibase key" os.environ["DATABRICKS_API_KEY"] = "databricks key"
os.environ["DATABRICKS_API_BASE"] = "databricks api base"
# predibae llama-3 call # databricks dbrx call
response = completion( response = completion(
model="predibase/llama3-8b-instruct", model="databricks/databricks-dbrx-instruct",
messages = [{ "content": "Hello, how are you?","role": "user"}], messages = [{ "content": "Hello, how are you?","role": "user"}],
max_tokens=20, max_tokens=20,
temperature=0.5 temperature=0.5

View file

@ -449,6 +449,54 @@ print(response)
</TabItem> </TabItem>
</Tabs> </Tabs>
## Usage - Function Calling
LiteLLM supports Function Calling for Vertex AI gemini models.
```python
from litellm import completion
import os
# set env
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ".."
os.environ["VERTEX_AI_PROJECT"] = ".."
os.environ["VERTEX_AI_LOCATION"] = ".."
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
response = completion(
model="vertex_ai/gemini-pro-vision",
messages=messages,
tools=tools,
)
# Add any assertions, here to check response args
print(response)
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)
```
## Chat Models ## Chat Models
| Model Name | Function Call | | Model Name | Function Call |
@ -500,6 +548,8 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
| Model Name | Function Call | | Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| text-embedding-004 | `embedding(model="vertex_ai/text-embedding-004", input)` |
| text-multilingual-embedding-002 | `embedding(model="vertex_ai/text-multilingual-embedding-002", input)` |
| textembedding-gecko | `embedding(model="vertex_ai/textembedding-gecko", input)` | | textembedding-gecko | `embedding(model="vertex_ai/textembedding-gecko", input)` |
| textembedding-gecko-multilingual | `embedding(model="vertex_ai/textembedding-gecko-multilingual", input)` | | textembedding-gecko-multilingual | `embedding(model="vertex_ai/textembedding-gecko-multilingual", input)` |
| textembedding-gecko-multilingual@001 | `embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)` | | textembedding-gecko-multilingual@001 | `embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)` |
@ -508,6 +558,29 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` | | text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` | | text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
### Advanced Use `task_type` and `title` (Vertex Specific Params)
👉 `task_type` and `title` are vertex specific params
LiteLLM Supported Vertex Specific Params
```python
auto_truncate: Optional[bool] = None
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
```
**Example Usage with LiteLLM**
```python
response = litellm.embedding(
model="vertex_ai/text-embedding-004",
input=["good morning from litellm", "gm"]
task_type = "RETRIEVAL_DOCUMENT",
dimensions=1,
auto_truncate=True,
)
```
## Image Generation Models ## Image Generation Models
Usage Usage

View file

@ -138,14 +138,22 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
<Image img={require('../../img/admin_ui_spend.png')} /> <Image img={require('../../img/admin_ui_spend.png')} />
## API Endpoints to get Spend ## API Endpoints to get Spend
#### Getting Spend Reports - To Charge Other Teams, API Keys #### Getting Spend Reports - To Charge Other Teams, Customers
Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model Use the `/global/spend/report` endpoint to get daily spend report per
- team
- customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
<Tabs>
<TabItem value="per team" label="Spend Per Team">
##### Example Request ##### Example Request
👉 Key Change: Specify `group_by=team`
```shell ```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=team' \
-H 'Authorization: Bearer sk-1234' -H 'Authorization: Bearer sk-1234'
``` ```
@ -254,6 +262,69 @@ Output from script
``` ```
</TabItem>
</Tabs>
</TabItem>
<TabItem value="per customer" label="Spend Per Customer">
##### Example Request
👉 Key Change: Specify `group_by=customer`
```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=customer' \
-H 'Authorization: Bearer sk-1234'
```
##### Example Response
```shell
[
{
"group_by_day": "2024-04-30T00:00:00+00:00",
"customers": [
{
"customer": "palantir",
"total_spend": 0.0015265,
"metadata": [ # see the spend by unique(key + model)
{
"model": "gpt-4",
"spend": 0.00123,
"total_tokens": 28,
"api_key": "88dc28.." # the hashed api key
},
{
"model": "gpt-4",
"spend": 0.00123,
"total_tokens": 28,
"api_key": "a73dc2.." # the hashed api key
},
{
"model": "chatgpt-v-2",
"spend": 0.000214,
"total_tokens": 122,
"api_key": "898c28.." # the hashed api key
},
{
"model": "gpt-3.5-turbo",
"spend": 0.0000825,
"total_tokens": 85,
"api_key": "84dc28.." # the hashed api key
}
]
}
]
}
]
```
</TabItem> </TabItem>
</Tabs> </Tabs>
@ -356,4 +427,23 @@ model_list:
## Custom Input/Output Pricing ## Custom Input/Output Pricing
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models 👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
## ✨ Custom k,v pairs
Log specific key,value pairs as part of the metadata for a spend log
:::info
Logging specific key,value pairs in spend logs metadata is an enterprise feature. [See here](./enterprise.md#tracking-spend-with-custom-metadata)
:::
## ✨ Custom Tags
:::info
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
:::

View file

@ -42,6 +42,14 @@ Set `JSON_LOGS="True"` in your env:
```bash ```bash
export JSON_LOGS="True" export JSON_LOGS="True"
``` ```
**OR**
Set `json_logs: true` in your yaml:
```yaml
litellm_settings:
json_logs: true
```
Start proxy Start proxy
@ -49,4 +57,35 @@ Start proxy
$ litellm $ litellm
``` ```
The proxy will now all logs in json format. The proxy will now all logs in json format.
## Control Log Output
Turn off fastapi's default 'INFO' logs
1. Turn on 'json logs'
```yaml
litellm_settings:
json_logs: true
```
2. Set `LITELLM_LOG` to 'ERROR'
Only get logs if an error occurs.
```bash
LITELLM_LOG="ERROR"
```
3. Start proxy
```bash
$ litellm
```
Expected Output:
```bash
# no info statements
```

View file

@ -1,5 +1,6 @@
import Tabs from '@theme/Tabs'; import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem'; import TabItem from '@theme/TabItem';
import Image from '@theme/IdealImage';
# 🐳 Docker, Deploying LiteLLM Proxy # 🐳 Docker, Deploying LiteLLM Proxy
@ -537,7 +538,9 @@ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
## Advanced Deployment Settings ## Advanced Deployment Settings
### Customization of the server root path ### 1. Customization of the server root path (custom Proxy base url)
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
:::info :::info
@ -548,9 +551,29 @@ In a Kubernetes deployment, it's possible to utilize a shared DNS to host multip
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment. Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path 👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
```
export SERVER_ROOT_PATH="/api/v1"
```
**Step 1. Run Proxy with `SERVER_ROOT_PATH` set in your env **
### Setting SSL Certification ```shell
docker run --name litellm-proxy \
-e DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname> \
-e SERVER_ROOT_PATH="/api/v1" \
-p 4000:4000 \
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
```
After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (since we set `SERVER_ROOT_PATH="/api/v1"`)
**Step 2. Verify Running on correct path**
<Image img={require('../../img/custom_root_path.png')} />
**That's it**, that's all you need to run the proxy on a custom root path
### 2. Setting SSL Certification
Use this, If you need to set ssl certificates for your on prem litellm proxy Use this, If you need to set ssl certificates for your on prem litellm proxy

View file

@ -205,6 +205,146 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \
``` ```
## Tracking Spend with custom metadata
Requirements:
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
#### Usage - /chat/completions requests with special spend logs metadata
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"spend_logs_metadata": {
"hello": "world"
}
}
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {
"spend_logs_metadata": {
"hello": "world"
}
}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"spend_logs_metadata": {
"hello": "world"
}
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
#### Viewing Spend w/ custom metadata
#### `/spend/logs` Request Format
```bash
curl -X GET "http://0.0.0.0:4000/spend/logs?request_id=<your-call-id" \ # e.g.: chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm
-H "Authorization: Bearer sk-1234"
```
#### `/spend/logs` Response Format
```bash
[
{
"request_id": "chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm",
"call_type": "acompletion",
"metadata": {
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"user_api_key_alias": null,
"spend_logs_metadata": { # 👈 LOGGED CUSTOM METADATA
"hello": "world"
},
"user_api_key_team_id": null,
"user_api_key_user_id": "116544810872468347480",
"user_api_key_team_alias": null
},
}
]
```
## Enforce Required Params for LLM Requests ## Enforce Required Params for LLM Requests
Use this when you want to enforce all requests to include certain params. Example you need all requests to include the `user` and `["metadata]["generation_name"]` params. Use this when you want to enforce all requests to include certain params. Example you need all requests to include the `user` and `["metadata]["generation_name"]` params.

View file

@ -606,6 +606,52 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
** 🎉 Expect to see this trace logged in your OTEL collector** ** 🎉 Expect to see this trace logged in your OTEL collector**
### Context propagation across Services `Traceparent HTTP Header`
❓ Use this when you want to **pass information about the incoming request in a distributed tracing system**
✅ Key change: Pass the **`traceparent` header** in your requests. [Read more about traceparent headers here](https://uptrace.dev/opentelemetry/opentelemetry-traceparent.html#what-is-traceparent-header)
```curl
traceparent: 00-80e1afed08e019fc1110464cfa66635c-7a085853722dc6d2-01
```
Example Usage
1. Make Request to LiteLLM Proxy with `traceparent` header
```python
import openai
import uuid
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
extra_headers = {
"traceparent": example_traceparent
}
_trace_id = example_traceparent.split("-")[1]
print("EXTRA HEADERS: ", extra_headers)
print("Trace ID: ", _trace_id)
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
extra_headers=extra_headers,
)
print(response)
```
```shell
# EXTRA HEADERS: {'traceparent': '00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01'}
# Trace ID: 80e1afed08e019fc1110464cfa66635c
```
2. Lookup Trace ID on OTEL Logger
Search for Trace=`80e1afed08e019fc1110464cfa66635c` on your OTEL Collector
<Image img={require('../../img/otel_parent.png')} />

View file

@ -21,6 +21,7 @@ general_settings:
litellm_settings: litellm_settings:
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
json_logs: true # Get debug logs in json format
``` ```
Set slack webhook url in your env Set slack webhook url in your env
@ -28,6 +29,11 @@ Set slack webhook url in your env
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/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" export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/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"
``` ```
Turn off FASTAPI's default info logs
```bash
export LITELLM_LOG="ERROR"
```
:::info :::info
Need Help or want dedicated support ? Talk to a founder [here]: (https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) Need Help or want dedicated support ? Talk to a founder [here]: (https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)

View file

@ -2,18 +2,13 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs'; import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem'; import TabItem from '@theme/TabItem';
# 🔥 Fallbacks, Retries, Timeouts, Load Balancing # 🔥 Load Balancing, Fallbacks, Retries, Timeouts
Retry call with multiple instances of the same model. - Quick Start [load balancing](#test---load-balancing)
- Quick Start [client side fallbacks](#test---client-side-fallbacks)
If a call fails after num_retries, fall back to another model group.
If the error is a context window exceeded error, fall back to a larger model group (if given).
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
## Quick Start - Load Balancing ## Quick Start - Load Balancing
### Step 1 - Set deployments on config #### Step 1 - Set deployments on config
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo` **Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
```yaml ```yaml
@ -38,50 +33,214 @@ model_list:
rpm: 1440 rpm: 1440
``` ```
### Step 2: Start Proxy with config #### Step 2: Start Proxy with config
```shell ```shell
$ litellm --config /path/to/config.yaml $ litellm --config /path/to/config.yaml
``` ```
### Step 3: Use proxy - Call a model group [Load Balancing] ### Test - Load Balancing
Curl Command
Here requests with model=gpt-3.5-turbo will be routed across multiple instances of azure/gpt-3.5-turbo
👉 Key Change: `model="gpt-3.5-turbo"`
**Check the `model_id` in Response Headers to make sure the requests are being load balanced**
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
]
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell ```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \ curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data ' { --data '{
"model": "gpt-3.5-turbo", "model": "gpt-3.5-turbo",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
"content": "what llm are you" "content": "what llm are you"
} }
], ]
} }'
' ```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
import os
os.environ["OPENAI_API_KEY"] = "anything"
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model="gpt-3.5-turbo",
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
``` ```
### Usage - Call a specific model deployment </TabItem>
If you want to call a specific model defined in the `config.yaml`, you can call the `litellm_params: model`
</Tabs>
### Test - Client Side Fallbacks
In this request the following will occur:
1. The request to `model="zephyr-beta"` will fail
2. litellm proxy will loop through all the model_groups specified in `fallbacks=["gpt-3.5-turbo"]`
3. The request to `model="gpt-3.5-turbo"` will succeed and the client making the request will get a response from gpt-3.5-turbo
👉 Key Change: `"fallbacks": ["gpt-3.5-turbo"]`
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
response = client.chat.completions.create(
model="zephyr-beta",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"fallbacks": ["gpt-3.5-turbo"]
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "zephyr-beta"",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"fallbacks": ["gpt-3.5-turbo"]
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
import os
os.environ["OPENAI_API_KEY"] = "anything"
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model="zephyr-beta",
extra_body={
"fallbacks": ["gpt-3.5-turbo"]
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
<!--
### Test it!
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
```bash ```bash
curl --location 'http://0.0.0.0:4000/chat/completions' \ curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data ' { --data-raw '{
"model": "azure/gpt-turbo-small-ca", "model": "zephyr-beta", # 👈 MODEL NAME to fallback from
"messages": [ "messages": [
{ {"role": "user", "content": "what color is red"}
"role": "user", ],
"content": "what llm are you" "mock_testing_fallbacks": true
} }'
], ``` -->
}
'
```
## Fallbacks + Retries + Timeouts + Cooldowns ## Advanced
### Fallbacks + Retries + Timeouts + Cooldowns
**Set via config** **Set via config**
```yaml ```yaml
@ -114,44 +273,7 @@ litellm_settings:
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
``` ```
### Context Window Fallbacks (Pre-Call Checks + Fallbacks)
**Set dynamically**
```bash
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data ' {
"model": "zephyr-beta",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
"context_window_fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
"num_retries": 2,
"timeout": 10
}
'
```
### Test it!
```bash
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data-raw '{
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
"messages": [
{"role": "user", "content": "what color is red"}
],
"mock_testing_fallbacks": true
}'
```
## Advanced - Context Window Fallbacks (Pre-Call Checks + Fallbacks)
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**. **Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
@ -287,7 +409,7 @@ print(response)
</Tabs> </Tabs>
## Advanced - EU-Region Filtering (Pre-Call Checks) ### EU-Region Filtering (Pre-Call Checks)
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**. **Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
@ -350,7 +472,7 @@ print(response)
print(f"response.headers.get('x-litellm-model-api-base')") print(f"response.headers.get('x-litellm-model-api-base')")
``` ```
## Advanced - Custom Timeouts, Stream Timeouts - Per Model ### Custom Timeouts, Stream Timeouts - Per Model
For each model you can set `timeout` & `stream_timeout` under `litellm_params` For each model you can set `timeout` & `stream_timeout` under `litellm_params`
```yaml ```yaml
model_list: model_list:
@ -379,7 +501,7 @@ $ litellm --config /path/to/config.yaml
``` ```
## Advanced - Setting Dynamic Timeouts - Per Request ### Setting Dynamic Timeouts - Per Request
LiteLLM Proxy supports setting a `timeout` per request LiteLLM Proxy supports setting a `timeout` per request

View file

@ -77,6 +77,28 @@ litellm_settings:
#### Step 2: Setup Oauth Client #### Step 2: Setup Oauth Client
<Tabs> <Tabs>
<TabItem value="okta" label="Okta SSO">
1. Add Okta credentials to your .env
```bash
GENERIC_CLIENT_ID = "<your-okta-client-id>"
GENERIC_CLIENT_SECRET = "<your-okta-client-secret>"
GENERIC_AUTHORIZATION_ENDPOINT = "<your-okta-domain>/authorize" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/authorize
GENERIC_TOKEN_ENDPOINT = "<your-okta-domain>/token" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/oauth/token
GENERIC_USERINFO_ENDPOINT = "<your-okta-domain>/userinfo" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/userinfo
```
You can get your domain specific auth/token/userinfo endpoints at `<YOUR-OKTA-DOMAIN>/.well-known/openid-configuration`
2. Add proxy url as callback_url on Okta
On Okta, add the 'callback_url' as `<proxy_base_url>/sso/callback`
<Image img={require('../../img/okta_callback_url.png')} />
</TabItem>
<TabItem value="google" label="Google SSO"> <TabItem value="google" label="Google SSO">
- Create a new Oauth 2.0 Client on https://console.cloud.google.com/ - Create a new Oauth 2.0 Client on https://console.cloud.google.com/
@ -115,7 +137,6 @@ MICROSOFT_TENANT="5a39737
</TabItem> </TabItem>
<TabItem value="Generic" label="Generic SSO Provider"> <TabItem value="Generic" label="Generic SSO Provider">
A generic OAuth client that can be used to quickly create support for any OAuth provider with close to no code A generic OAuth client that can be used to quickly create support for any OAuth provider with close to no code

View file

@ -63,7 +63,7 @@ You can:
- Add budgets to Teams - Add budgets to Teams
#### **Add budgets to users** #### **Add budgets to teams**
```shell ```shell
curl --location 'http://localhost:4000/team/new' \ curl --location 'http://localhost:4000/team/new' \
--header 'Authorization: Bearer <your-master-key>' \ --header 'Authorization: Bearer <your-master-key>' \
@ -102,6 +102,22 @@ curl --location 'http://localhost:4000/team/new' \
"budget_reset_at": null "budget_reset_at": null
} }
``` ```
#### **Add budget duration to teams**
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
```
curl 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_alias": "my-new-team_4",
"members_with_roles": [{"role": "admin", "user_id": "5c4a0aa3-a1e1-43dc-bd87-3c2da8382a3a"}],
"budget_duration": 10s,
}'
```
</TabItem> </TabItem>
<TabItem value="per-team-member" label="For Team Members"> <TabItem value="per-team-member" label="For Team Members">

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 279 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 168 KiB

View file

@ -183,6 +183,7 @@ const sidebars = {
label: "Logging & Observability", label: "Logging & Observability",
items: [ items: [
"debugging/local_debugging", "debugging/local_debugging",
"observability/raw_request_response",
"observability/callbacks", "observability/callbacks",
"observability/custom_callback", "observability/custom_callback",
"observability/langfuse_integration", "observability/langfuse_integration",
@ -256,6 +257,7 @@ const sidebars = {
"projects/GPT Migrate", "projects/GPT Migrate",
"projects/YiVal", "projects/YiVal",
"projects/LiteLLM Proxy", "projects/LiteLLM Proxy",
"projects/llm_cord",
], ],
}, },
], ],

View file

@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
pre_call_rules: List[Callable] = [] pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = [] post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False turn_off_message_logging: Optional[bool] = False
log_raw_request_response: bool = False
redact_messages_in_exceptions: Optional[bool] = False redact_messages_in_exceptions: Optional[bool] = False
store_audit_logs = False # Enterprise feature, allow users to see audit logs store_audit_logs = False # Enterprise feature, allow users to see audit logs
## end of callbacks ############# ## end of callbacks #############
@ -407,6 +408,7 @@ openai_compatible_providers: List = [
"together_ai", "together_ai",
"fireworks_ai", "fireworks_ai",
"friendliai", "friendliai",
"azure_ai",
] ]
@ -611,6 +613,7 @@ provider_list: List = [
"baseten", "baseten",
"azure", "azure",
"azure_text", "azure_text",
"azure_ai",
"sagemaker", "sagemaker",
"bedrock", "bedrock",
"vllm", "vllm",
@ -765,7 +768,7 @@ from .llms.gemini import GeminiConfig
from .llms.nlp_cloud import NLPCloudConfig from .llms.nlp_cloud import NLPCloudConfig
from .llms.aleph_alpha import AlephAlphaConfig from .llms.aleph_alpha import AlephAlphaConfig
from .llms.petals import PetalsConfig from .llms.petals import PetalsConfig
from .llms.vertex_ai import VertexAIConfig from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
from .llms.sagemaker import SagemakerConfig from .llms.sagemaker import SagemakerConfig
from .llms.ollama import OllamaConfig from .llms.ollama import OllamaConfig
@ -787,6 +790,7 @@ from .llms.openai import (
OpenAIConfig, OpenAIConfig,
OpenAITextCompletionConfig, OpenAITextCompletionConfig,
MistralConfig, MistralConfig,
MistralEmbeddingConfig,
DeepInfraConfig, DeepInfraConfig,
) )
from .llms.azure import ( from .llms.azure import (

View file

@ -337,8 +337,6 @@ def response_cost_calculator(
and custom_llm_provider is True and custom_llm_provider is True
): # override defaults if custom pricing is set ): # override defaults if custom pricing is set
base_model = model base_model = model
elif base_model is None:
base_model = model
# base_model defaults to None if not set on model_info # base_model defaults to None if not set on model_info
response_cost = completion_cost( response_cost = completion_cost(
completion_response=response_object, completion_response=response_object,

View file

@ -337,6 +337,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
model=self.model, # type: ignore model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore llm_provider=self.llm_provider, # type: ignore
response=response, response=response,
litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
def __str__(self): def __str__(self):
@ -379,6 +380,7 @@ class RejectedRequestError(BadRequestError): # type: ignore
model=self.model, # type: ignore model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore llm_provider=self.llm_provider, # type: ignore
response=response, response=response,
litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
def __str__(self): def __str__(self):
@ -418,6 +420,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
model=self.model, # type: ignore model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore llm_provider=self.llm_provider, # type: ignore
response=response, response=response,
litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
def __str__(self): def __str__(self):

View file

@ -6,17 +6,23 @@ import litellm
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.types.services import ServiceLoggerPayload from litellm.types.services import ServiceLoggerPayload
from functools import wraps
from typing import Union, Optional, TYPE_CHECKING, Any from typing import Union, Optional, TYPE_CHECKING, Any
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
from litellm.proxy._types import (
ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
)
Span = _Span Span = _Span
UserAPIKeyAuth = _UserAPIKeyAuth UserAPIKeyAuth = _UserAPIKeyAuth
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
else: else:
Span = Any Span = Any
UserAPIKeyAuth = Any UserAPIKeyAuth = Any
ManagementEndpointLoggingPayload = Any
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm") LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
@ -247,7 +253,7 @@ class OpenTelemetry(CustomLogger):
span.end(end_time=self._to_ns(end_time)) span.end(end_time=self._to_ns(end_time))
def set_tools_attributes(self, span: Span, tools): def set_tools_attributes(self, span: Span, tools):
from opentelemetry.semconv.ai import SpanAttributes from litellm.proxy._types import SpanAttributes
import json import json
if not tools: if not tools:
@ -272,7 +278,7 @@ class OpenTelemetry(CustomLogger):
pass pass
def set_attributes(self, span: Span, kwargs, response_obj): def set_attributes(self, span: Span, kwargs, response_obj):
from opentelemetry.semconv.ai import SpanAttributes from litellm.proxy._types import SpanAttributes
optional_params = kwargs.get("optional_params", {}) optional_params = kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
@ -407,7 +413,7 @@ class OpenTelemetry(CustomLogger):
) )
def set_raw_request_attributes(self, span: Span, kwargs, response_obj): def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
from opentelemetry.semconv.ai import SpanAttributes from litellm.proxy._types import SpanAttributes
optional_params = kwargs.get("optional_params", {}) optional_params = kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
@ -454,6 +460,23 @@ class OpenTelemetry(CustomLogger):
def _get_span_name(self, kwargs): def _get_span_name(self, kwargs):
return LITELLM_REQUEST_SPAN_NAME return LITELLM_REQUEST_SPAN_NAME
def get_traceparent_from_header(self, headers):
if headers is None:
return None
_traceparent = headers.get("traceparent", None)
if _traceparent is None:
return None
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator,
)
verbose_logger.debug("OpenTelemetry: GOT A TRACEPARENT {}".format(_traceparent))
propagator = TraceContextTextMapPropagator()
_parent_context = propagator.extract(carrier={"traceparent": _traceparent})
verbose_logger.debug("OpenTelemetry: PARENT CONTEXT {}".format(_parent_context))
return _parent_context
def _get_span_context(self, kwargs): def _get_span_context(self, kwargs):
from opentelemetry.trace.propagation.tracecontext import ( from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator, TraceContextTextMapPropagator,
@ -545,3 +568,91 @@ class OpenTelemetry(CustomLogger):
self.OTEL_EXPORTER, self.OTEL_EXPORTER,
) )
return BatchSpanProcessor(ConsoleSpanExporter()) return BatchSpanProcessor(ConsoleSpanExporter())
async def async_management_endpoint_success_hook(
self,
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from opentelemetry import trace
from datetime import datetime
from opentelemetry.trace import Status, StatusCode
_start_time_ns = logging_payload.start_time
_end_time_ns = logging_payload.end_time
start_time = logging_payload.start_time
end_time = logging_payload.end_time
if isinstance(start_time, float):
_start_time_ns = int(int(start_time) * 1e9)
else:
_start_time_ns = self._to_ns(start_time)
if isinstance(end_time, float):
_end_time_ns = int(int(end_time) * 1e9)
else:
_end_time_ns = self._to_ns(end_time)
if parent_otel_span is not None:
_span_name = logging_payload.route
management_endpoint_span = self.tracer.start_span(
name=_span_name,
context=trace.set_span_in_context(parent_otel_span),
start_time=_start_time_ns,
)
_request_data = logging_payload.request_data
if _request_data is not None:
for key, value in _request_data.items():
management_endpoint_span.set_attribute(f"request.{key}", value)
_response = logging_payload.response
if _response is not None:
for key, value in _response.items():
management_endpoint_span.set_attribute(f"response.{key}", value)
management_endpoint_span.set_status(Status(StatusCode.OK))
management_endpoint_span.end(end_time=_end_time_ns)
async def async_management_endpoint_failure_hook(
self,
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from opentelemetry import trace
from datetime import datetime
from opentelemetry.trace import Status, StatusCode
_start_time_ns = logging_payload.start_time
_end_time_ns = logging_payload.end_time
start_time = logging_payload.start_time
end_time = logging_payload.end_time
if isinstance(start_time, float):
_start_time_ns = int(int(start_time) * 1e9)
else:
_start_time_ns = self._to_ns(start_time)
if isinstance(end_time, float):
_end_time_ns = int(int(end_time) * 1e9)
else:
_end_time_ns = self._to_ns(end_time)
if parent_otel_span is not None:
_span_name = logging_payload.route
management_endpoint_span = self.tracer.start_span(
name=_span_name,
context=trace.set_span_in_context(parent_otel_span),
start_time=_start_time_ns,
)
_request_data = logging_payload.request_data
if _request_data is not None:
for key, value in _request_data.items():
management_endpoint_span.set_attribute(f"request.{key}", value)
_exception = logging_payload.exception
management_endpoint_span.set_attribute(f"exception", str(_exception))
management_endpoint_span.set_status(Status(StatusCode.ERROR))
management_endpoint_span.end(end_time=_end_time_ns)

View file

@ -36,6 +36,9 @@ from ..types.llms.openai import (
AsyncAssistantStreamManager, AsyncAssistantStreamManager,
AssistantStreamManager, AssistantStreamManager,
) )
from litellm.caching import DualCache
azure_ad_cache = DualCache()
class AzureOpenAIError(Exception): class AzureOpenAIError(Exception):
@ -309,9 +312,10 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
def get_azure_ad_token_from_oidc(azure_ad_token: str): def get_azure_ad_token_from_oidc(azure_ad_token: str):
azure_client_id = os.getenv("AZURE_CLIENT_ID", None) azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
azure_tenant = os.getenv("AZURE_TENANT_ID", None) azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
azure_authority_host = os.getenv("AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com")
if azure_client_id is None or azure_tenant is None: if azure_client_id is None or azure_tenant_id is None:
raise AzureOpenAIError( raise AzureOpenAIError(
status_code=422, status_code=422,
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set", message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
@ -325,8 +329,19 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
message="OIDC token could not be retrieved from secret manager.", message="OIDC token could not be retrieved from secret manager.",
) )
azure_ad_token_cache_key = json.dumps({
"azure_client_id": azure_client_id,
"azure_tenant_id": azure_tenant_id,
"azure_authority_host": azure_authority_host,
"oidc_token": oidc_token,
})
azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
if azure_ad_token_access_token is not None:
return azure_ad_token_access_token
req_token = httpx.post( req_token = httpx.post(
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token", f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
data={ data={
"client_id": azure_client_id, "client_id": azure_client_id,
"grant_type": "client_credentials", "grant_type": "client_credentials",
@ -342,12 +357,23 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
message=req_token.text, message=req_token.text,
) )
possible_azure_ad_token = req_token.json().get("access_token", None) azure_ad_token_json = req_token.json()
azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
if possible_azure_ad_token is None: if azure_ad_token_access_token is None:
raise AzureOpenAIError(status_code=422, message="Azure AD Token not returned") raise AzureOpenAIError(
status_code=422, message="Azure AD Token access_token not returned"
)
return possible_azure_ad_token if azure_ad_token_expires_in is None:
raise AzureOpenAIError(
status_code=422, message="Azure AD Token expires_in not returned"
)
azure_ad_cache.set_cache(key=azure_ad_token_cache_key, value=azure_ad_token_access_token, ttl=azure_ad_token_expires_in)
return azure_ad_token_access_token
class AzureChatCompletion(BaseLLM): class AzureChatCompletion(BaseLLM):

View file

@ -51,8 +51,11 @@ from litellm.types.llms.openai import (
ChatCompletionResponseMessage, ChatCompletionResponseMessage,
ChatCompletionToolCallChunk, ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk, ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaChunk,
) )
from litellm.caching import DualCache
iam_cache = DualCache()
class AmazonCohereChatConfig: class AmazonCohereChatConfig:
""" """
@ -324,38 +327,53 @@ class BedrockLLM(BaseLLM):
) = params_to_check ) = params_to_check
### CHECK STS ### ### CHECK STS ###
if ( if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
aws_web_identity_token is not None iam_creds_cache_key = json.dumps({
and aws_role_name is not None "aws_web_identity_token": aws_web_identity_token,
and aws_session_name is not None "aws_role_name": aws_role_name,
): "aws_session_name": aws_session_name,
oidc_token = get_secret(aws_web_identity_token) "aws_region_name": aws_region_name,
})
if oidc_token is None: iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
raise BedrockError( if iam_creds_dict is None:
message="OIDC token could not be retrieved from secret manager.", oidc_token = get_secret(aws_web_identity_token)
status_code=401,
if oidc_token is None:
raise BedrockError(
message="OIDC token could not be retrieved from secret manager.",
status_code=401,
)
sts_client = boto3.client(
"sts",
region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
) )
sts_client = boto3.client("sts") # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
sts_response = sts_client.assume_role_with_web_identity(
RoleArn=aws_role_name,
RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token,
DurationSeconds=3600,
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html iam_creds_dict = {
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
sts_response = sts_client.assume_role_with_web_identity( "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
RoleArn=aws_role_name, "aws_session_token": sts_response["Credentials"]["SessionToken"],
RoleSessionName=aws_session_name, "region_name": aws_region_name,
WebIdentityToken=oidc_token, }
DurationSeconds=3600,
)
session = boto3.Session( iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
aws_session_token=sts_response["Credentials"]["SessionToken"],
region_name=aws_region_name,
)
return session.get_credentials() session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials()
return iam_creds
elif aws_role_name is not None and aws_session_name is not None: elif aws_role_name is not None and aws_session_name is not None:
sts_client = boto3.client( sts_client = boto3.client(
"sts", "sts",
@ -1415,38 +1433,53 @@ class BedrockConverseLLM(BaseLLM):
) = params_to_check ) = params_to_check
### CHECK STS ### ### CHECK STS ###
if ( if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
aws_web_identity_token is not None iam_creds_cache_key = json.dumps({
and aws_role_name is not None "aws_web_identity_token": aws_web_identity_token,
and aws_session_name is not None "aws_role_name": aws_role_name,
): "aws_session_name": aws_session_name,
oidc_token = get_secret(aws_web_identity_token) "aws_region_name": aws_region_name,
})
if oidc_token is None: iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
raise BedrockError( if iam_creds_dict is None:
message="OIDC token could not be retrieved from secret manager.", oidc_token = get_secret(aws_web_identity_token)
status_code=401,
if oidc_token is None:
raise BedrockError(
message="OIDC token could not be retrieved from secret manager.",
status_code=401,
)
sts_client = boto3.client(
"sts",
region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
) )
sts_client = boto3.client("sts") # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
sts_response = sts_client.assume_role_with_web_identity(
RoleArn=aws_role_name,
RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token,
DurationSeconds=3600,
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html iam_creds_dict = {
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
sts_response = sts_client.assume_role_with_web_identity( "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
RoleArn=aws_role_name, "aws_session_token": sts_response["Credentials"]["SessionToken"],
RoleSessionName=aws_session_name, "region_name": aws_region_name,
WebIdentityToken=oidc_token, }
DurationSeconds=3600,
)
session = boto3.Session( iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
aws_session_token=sts_response["Credentials"]["SessionToken"],
region_name=aws_region_name,
)
return session.get_credentials() session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials()
return iam_creds
elif aws_role_name is not None and aws_session_name is not None: elif aws_role_name is not None and aws_session_name is not None:
sts_client = boto3.client( sts_client = boto3.client(
"sts", "sts",
@ -1859,29 +1892,59 @@ class AWSEventStreamDecoder:
self.parser = EventStreamJSONParser() self.parser = EventStreamJSONParser()
def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk: def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
text = "" try:
tool_str = "" text = ""
is_finished = False tool_use: Optional[ChatCompletionToolCallChunk] = None
finish_reason = "" is_finished = False
usage: Optional[ConverseTokenUsageBlock] = None finish_reason = ""
if "delta" in chunk_data: usage: Optional[ConverseTokenUsageBlock] = None
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
if "text" in delta_obj: index = int(chunk_data.get("contentBlockIndex", 0))
text = delta_obj["text"] if "start" in chunk_data:
elif "toolUse" in delta_obj: start_obj = ContentBlockStartEvent(**chunk_data["start"])
tool_str = delta_obj["toolUse"]["input"] if (
elif "stopReason" in chunk_data: start_obj is not None
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop")) and "toolUse" in start_obj
elif "usage" in chunk_data: and start_obj["toolUse"] is not None
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore ):
response = GenericStreamingChunk( tool_use = {
text=text, "id": start_obj["toolUse"]["toolUseId"],
tool_str=tool_str, "type": "function",
is_finished=is_finished, "function": {
finish_reason=finish_reason, "name": start_obj["toolUse"]["name"],
usage=usage, "arguments": "",
) },
return response }
elif "delta" in chunk_data:
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
if "text" in delta_obj:
text = delta_obj["text"]
elif "toolUse" in delta_obj:
tool_use = {
"id": None,
"type": "function",
"function": {
"name": None,
"arguments": delta_obj["toolUse"]["input"],
},
}
elif "stopReason" in chunk_data:
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
is_finished = True
elif "usage" in chunk_data:
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
response = GenericStreamingChunk(
text=text,
tool_use=tool_use,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
index=index,
)
return response
except Exception as e:
raise Exception("Received streaming error - {}".format(str(e)))
def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk: def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
text = "" text = ""
@ -1890,12 +1953,16 @@ class AWSEventStreamDecoder:
if "outputText" in chunk_data: if "outputText" in chunk_data:
text = chunk_data["outputText"] text = chunk_data["outputText"]
# ai21 mapping # ai21 mapping
if "ai21" in self.model: # fake ai21 streaming elif "ai21" in self.model: # fake ai21 streaming
text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
is_finished = True is_finished = True
finish_reason = "stop" finish_reason = "stop"
######## bedrock.anthropic mappings ############### ######## bedrock.anthropic mappings ###############
elif "delta" in chunk_data: elif (
"contentBlockIndex" in chunk_data
or "stopReason" in chunk_data
or "metrics" in chunk_data
):
return self.converse_chunk_parser(chunk_data=chunk_data) return self.converse_chunk_parser(chunk_data=chunk_data)
######## bedrock.mistral mappings ############### ######## bedrock.mistral mappings ###############
elif "outputs" in chunk_data: elif "outputs" in chunk_data:
@ -1905,7 +1972,7 @@ class AWSEventStreamDecoder:
): ):
text = chunk_data["outputs"][0]["text"] text = chunk_data["outputs"][0]["text"]
stop_reason = chunk_data.get("stop_reason", None) stop_reason = chunk_data.get("stop_reason", None)
if stop_reason != None: if stop_reason is not None:
is_finished = True is_finished = True
finish_reason = stop_reason finish_reason = stop_reason
######## bedrock.cohere mappings ############### ######## bedrock.cohere mappings ###############
@ -1926,8 +1993,9 @@ class AWSEventStreamDecoder:
text=text, text=text,
is_finished=is_finished, is_finished=is_finished,
finish_reason=finish_reason, finish_reason=finish_reason,
tool_str="",
usage=None, usage=None,
index=0,
tool_use=None,
) )
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]: def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:

View file

@ -139,6 +139,7 @@ def process_response(
def convert_model_to_url(model: str, api_base: str): def convert_model_to_url(model: str, api_base: str):
user_id, app_id, model_id = model.split(".") user_id, app_id, model_id = model.split(".")
model_id = model_id.lower()
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs" return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
@ -171,19 +172,55 @@ async def async_completion(
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0)) async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
response = await async_handler.post( response = await async_handler.post(
api_base, headers=headers, data=json.dumps(data) url=model, headers=headers, data=json.dumps(data)
) )
return process_response( logging_obj.post_call(
model=model, input=prompt,
prompt=prompt,
response=response,
model_response=model_response,
api_key=api_key, api_key=api_key,
data=data, original_response=response.text,
encoding=encoding, additional_args={"complete_input_dict": data},
logging_obj=logging_obj,
) )
## RESPONSE OBJECT
try:
completion_response = response.json()
except Exception:
raise ClarifaiError(
message=response.text, status_code=response.status_code, url=model
)
# print(completion_response)
try:
choices_list = []
for idx, item in enumerate(completion_response["outputs"]):
if len(item["data"]["text"]["raw"]) > 0:
message_obj = Message(content=item["data"]["text"]["raw"])
else:
message_obj = Message(content=None)
choice_obj = Choices(
finish_reason="stop",
index=idx + 1, # check
message=message_obj,
)
choices_list.append(choice_obj)
model_response["choices"] = choices_list
except Exception as e:
raise ClarifaiError(
message=traceback.format_exc(), status_code=response.status_code, url=model
)
# Calculate Usage
prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len(
encoding.encode(model_response["choices"][0]["message"].get("content"))
)
model_response["model"] = model
model_response["usage"] = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
return model_response
def completion( def completion(
@ -241,7 +278,7 @@ def completion(
additional_args={ additional_args={
"complete_input_dict": data, "complete_input_dict": data,
"headers": headers, "headers": headers,
"api_base": api_base, "api_base": model,
}, },
) )
if acompletion == True: if acompletion == True:

View file

@ -164,6 +164,49 @@ class MistralConfig:
return optional_params return optional_params
class MistralEmbeddingConfig:
"""
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
"""
def __init__(
self,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"encoding_format",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "encoding_format":
optional_params["encoding_format"] = value
return optional_params
class DeepInfraConfig: class DeepInfraConfig:
""" """
Reference: https://deepinfra.com/docs/advanced/openai_api Reference: https://deepinfra.com/docs/advanced/openai_api

File diff suppressed because it is too large Load diff

View file

@ -4,6 +4,7 @@ from enum import Enum
import requests # type: ignore import requests # type: ignore
import time import time
from typing import Callable, Optional, Union, List, Literal, Any from typing import Callable, Optional, Union, List, Literal, Any
from pydantic import BaseModel
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
import litellm, uuid import litellm, uuid
import httpx, inspect # type: ignore import httpx, inspect # type: ignore
@ -12,7 +13,12 @@ from litellm.llms.prompt_templates.factory import (
convert_to_gemini_tool_call_result, convert_to_gemini_tool_call_result,
convert_to_gemini_tool_call_invoke, convert_to_gemini_tool_call_invoke,
) )
from litellm.types.files import get_file_mime_type_for_file_type, get_file_type_from_extension, is_gemini_1_5_accepted_file_type, is_video_file_type from litellm.types.files import (
get_file_mime_type_for_file_type,
get_file_type_from_extension,
is_gemini_1_5_accepted_file_type,
is_video_file_type,
)
class VertexAIError(Exception): class VertexAIError(Exception):
@ -301,15 +307,15 @@ def _process_gemini_image(image_url: str) -> PartType:
# GCS URIs # GCS URIs
if "gs://" in image_url: if "gs://" in image_url:
# Figure out file type # Figure out file type
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png" extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
extension = extension_with_dot[1:] # Ex: "png" extension = extension_with_dot[1:] # Ex: "png"
file_type = get_file_type_from_extension(extension) file_type = get_file_type_from_extension(extension)
# Validate the file type is supported by Gemini # Validate the file type is supported by Gemini
if not is_gemini_1_5_accepted_file_type(file_type): if not is_gemini_1_5_accepted_file_type(file_type):
raise Exception(f"File type not supported by gemini - {file_type}") raise Exception(f"File type not supported by gemini - {file_type}")
mime_type = get_file_mime_type_for_file_type(file_type) mime_type = get_file_mime_type_for_file_type(file_type)
file_data = FileDataType(mime_type=mime_type, file_uri=image_url) file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
@ -320,7 +326,7 @@ def _process_gemini_image(image_url: str) -> PartType:
image = _load_image_from_url(image_url) image = _load_image_from_url(image_url)
_blob = BlobType(data=image.data, mime_type=image._mime_type) _blob = BlobType(data=image.data, mime_type=image._mime_type)
return PartType(inline_data=_blob) return PartType(inline_data=_blob)
# Base64 encoding # Base64 encoding
elif "base64" in image_url: elif "base64" in image_url:
import base64, re import base64, re
@ -1293,6 +1299,95 @@ async def async_streaming(
return streamwrapper return streamwrapper
class VertexAITextEmbeddingConfig(BaseModel):
"""
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
Args:
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
"""
auto_truncate: Optional[bool] = None
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None
title: Optional[str] = None
def __init__(
self,
auto_truncate: Optional[bool] = None,
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None,
title: Optional[str] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"dimensions",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "dimensions":
optional_params["output_dimensionality"] = value
return optional_params
def get_mapped_special_auth_params(self) -> dict:
"""
Common auth params across bedrock/vertex_ai/azure/watsonx
"""
return {"project": "vertex_project", "region_name": "vertex_location"}
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
mapped_params = self.get_mapped_special_auth_params()
for param, value in non_default_params.items():
if param in mapped_params:
optional_params[mapped_params[param]] = value
return optional_params
def embedding( def embedding(
model: str, model: str,
input: Union[list, str], input: Union[list, str],
@ -1316,7 +1411,7 @@ def embedding(
message="vertexai import failed please run `pip install google-cloud-aiplatform`", message="vertexai import failed please run `pip install google-cloud-aiplatform`",
) )
from vertexai.language_models import TextEmbeddingModel from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
import google.auth # type: ignore import google.auth # type: ignore
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744 ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
@ -1347,6 +1442,16 @@ def embedding(
if isinstance(input, str): if isinstance(input, str):
input = [input] input = [input]
if optional_params is not None and isinstance(optional_params, dict):
if optional_params.get("task_type") or optional_params.get("title"):
# if user passed task_type or title, cast to TextEmbeddingInput
_task_type = optional_params.pop("task_type", None)
_title = optional_params.pop("title", None)
input = [
TextEmbeddingInput(text=x, task_type=_task_type, title=_title)
for x in input
]
try: try:
llm_model = TextEmbeddingModel.from_pretrained(model) llm_model = TextEmbeddingModel.from_pretrained(model)
except Exception as e: except Exception as e:
@ -1363,7 +1468,8 @@ def embedding(
encoding=encoding, encoding=encoding,
) )
request_str = f"""embeddings = llm_model.get_embeddings({input})""" _input_dict = {"texts": input, **optional_params}
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
## LOGGING PRE-CALL ## LOGGING PRE-CALL
logging_obj.pre_call( logging_obj.pre_call(
input=input, input=input,
@ -1375,7 +1481,7 @@ def embedding(
) )
try: try:
embeddings = llm_model.get_embeddings(input) embeddings = llm_model.get_embeddings(**_input_dict)
except Exception as e: except Exception as e:
raise VertexAIError(status_code=500, message=str(e)) raise VertexAIError(status_code=500, message=str(e))
@ -1383,6 +1489,7 @@ def embedding(
logging_obj.post_call(input=input, api_key=None, original_response=embeddings) logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
## Populate OpenAI compliant dictionary ## Populate OpenAI compliant dictionary
embedding_response = [] embedding_response = []
input_tokens: int = 0
for idx, embedding in enumerate(embeddings): for idx, embedding in enumerate(embeddings):
embedding_response.append( embedding_response.append(
{ {
@ -1391,14 +1498,10 @@ def embedding(
"embedding": embedding.values, "embedding": embedding.values,
} }
) )
input_tokens += embedding.statistics.token_count
model_response["object"] = "list" model_response["object"] = "list"
model_response["data"] = embedding_response model_response["data"] = embedding_response
model_response["model"] = model model_response["model"] = model
input_tokens = 0
input_str = "".join(input)
input_tokens += len(encoding.encode(input_str))
usage = Usage( usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
@ -1420,7 +1523,8 @@ async def async_embedding(
""" """
Async embedding implementation Async embedding implementation
""" """
request_str = f"""embeddings = llm_model.get_embeddings({input})""" _input_dict = {"texts": input, **optional_params}
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
## LOGGING PRE-CALL ## LOGGING PRE-CALL
logging_obj.pre_call( logging_obj.pre_call(
input=input, input=input,
@ -1432,7 +1536,7 @@ async def async_embedding(
) )
try: try:
embeddings = await client.get_embeddings_async(input) embeddings = await client.get_embeddings_async(**_input_dict)
except Exception as e: except Exception as e:
raise VertexAIError(status_code=500, message=str(e)) raise VertexAIError(status_code=500, message=str(e))
@ -1440,6 +1544,7 @@ async def async_embedding(
logging_obj.post_call(input=input, api_key=None, original_response=embeddings) logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
## Populate OpenAI compliant dictionary ## Populate OpenAI compliant dictionary
embedding_response = [] embedding_response = []
input_tokens: int = 0
for idx, embedding in enumerate(embeddings): for idx, embedding in enumerate(embeddings):
embedding_response.append( embedding_response.append(
{ {
@ -1448,18 +1553,13 @@ async def async_embedding(
"embedding": embedding.values, "embedding": embedding.values,
} }
) )
input_tokens += embedding.statistics.token_count
model_response["object"] = "list" model_response["object"] = "list"
model_response["data"] = embedding_response model_response["data"] = embedding_response
model_response["model"] = model model_response["model"] = model
input_tokens = 0
input_str = "".join(input)
input_tokens += len(encoding.encode(input_str))
usage = Usage( usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
) )
model_response.usage = usage model_response.usage = usage
return model_response return model_response

View file

@ -11,10 +11,10 @@ import os, openai, sys, json, inspect, uuid, datetime, threading
from typing import Any, Literal, Union, BinaryIO from typing import Any, Literal, Union, BinaryIO
from typing_extensions import overload from typing_extensions import overload
from functools import partial from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy from copy import deepcopy
import httpx import httpx
import litellm import litellm
from ._logging import verbose_logger from ._logging import verbose_logger
from litellm import ( # type: ignore from litellm import ( # type: ignore
@ -335,6 +335,7 @@ async def acompletion(
or custom_llm_provider == "predibase" or custom_llm_provider == "predibase"
or custom_llm_provider == "bedrock" or custom_llm_provider == "bedrock"
or custom_llm_provider == "databricks" or custom_llm_provider == "databricks"
or custom_llm_provider == "clarifai"
or custom_llm_provider in litellm.openai_compatible_providers or custom_llm_provider in litellm.openai_compatible_providers
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all. ): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
init_response = await loop.run_in_executor(None, func_with_context) init_response = await loop.run_in_executor(None, func_with_context)

View file

@ -1387,6 +1387,26 @@
"mode": "image_generation", "mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
}, },
"text-embedding-004": {
"max_tokens": 3072,
"max_input_tokens": 3072,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"text-multilingual-embedding-002": {
"max_tokens": 2048,
"max_input_tokens": 2048,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"textembedding-gecko": { "textembedding-gecko": {
"max_tokens": 3072, "max_tokens": 3072,
"max_input_tokens": 3072, "max_input_tokens": 3072,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""] 3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -2,6 +2,6 @@
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -2,6 +2,6 @@
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,7 +1,12 @@
import json import json
import logging import logging
from logging import Formatter from logging import Formatter
import sys import os
from litellm import json_logs
# Set default log level to INFO
log_level = os.getenv("LITELLM_LOG", "INFO")
numeric_level: str = getattr(logging, log_level.upper())
class JsonFormatter(Formatter): class JsonFormatter(Formatter):
@ -16,6 +21,14 @@ class JsonFormatter(Formatter):
logger = logging.root logger = logging.root
handler = logging.StreamHandler() handler = logging.StreamHandler()
handler.setFormatter(JsonFormatter()) if json_logs:
handler.setFormatter(JsonFormatter())
else:
formatter = logging.Formatter(
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
datefmt="%H:%M:%S",
)
handler.setFormatter(formatter)
logger.handlers = [handler] logger.handlers = [handler]
logger.setLevel(logging.INFO) logger.setLevel(numeric_level)

View file

@ -719,6 +719,8 @@ class Member(LiteLLMBase):
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
def check_user_info(cls, values): def check_user_info(cls, values):
if not isinstance(values, dict):
raise ValueError("input needs to be a dictionary")
if values.get("user_id") is None and values.get("user_email") is None: if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided") raise ValueError("Either user id or user email must be provided")
return values return values
@ -757,9 +759,24 @@ class GlobalEndUsersSpend(LiteLLMBase):
class TeamMemberAddRequest(LiteLLMBase): class TeamMemberAddRequest(LiteLLMBase):
team_id: str team_id: str
member: Member member: Union[List[Member], Member]
max_budget_in_team: Optional[float] = None # Users max budget within the team max_budget_in_team: Optional[float] = None # Users max budget within the team
def __init__(self, **data):
member_data = data.get("member")
if isinstance(member_data, list):
# If member is a list of dictionaries, convert each dictionary to a Member object
members = [Member(**item) for item in member_data]
# Replace member_data with the list of Member objects
data["member"] = members
elif isinstance(member_data, dict):
# If member is a dictionary, convert it to a single Member object
member = Member(**member_data)
# Replace member_data with the single Member object
data["member"] = member
# Call the superclass __init__ method to initialize the object
super().__init__(**data)
class TeamMemberDeleteRequest(LiteLLMBase): class TeamMemberDeleteRequest(LiteLLMBase):
team_id: str team_id: str
@ -1472,6 +1489,9 @@ class SpendLogsMetadata(TypedDict):
user_api_key_team_id: Optional[str] user_api_key_team_id: Optional[str]
user_api_key_user_id: Optional[str] user_api_key_user_id: Optional[str]
user_api_key_team_alias: Optional[str] user_api_key_team_alias: Optional[str]
spend_logs_metadata: Optional[
dict
] # special param to log k,v pairs to spendlogs for a call
class SpendLogsPayload(TypedDict): class SpendLogsPayload(TypedDict):
@ -1496,3 +1516,60 @@ class SpendLogsPayload(TypedDict):
request_tags: str # json str request_tags: str # json str
team_id: Optional[str] team_id: Optional[str]
end_user: Optional[str] end_user: Optional[str]
class SpanAttributes(str, enum.Enum):
# Note: We've taken this from opentelemetry-semantic-conventions-ai
# I chose to not add a new dependency to litellm for this
# Semantic Conventions for LLM requests, this needs to be removed after
# OpenTelemetry Semantic Conventions support Gen AI.
# Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
# Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
LLM_SYSTEM = "gen_ai.system"
LLM_REQUEST_MODEL = "gen_ai.request.model"
LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
LLM_PROMPTS = "gen_ai.prompt"
LLM_COMPLETIONS = "gen_ai.completion"
LLM_RESPONSE_MODEL = "gen_ai.response.model"
LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
LLM_TOKEN_TYPE = "gen_ai.token.type"
# To be added
# LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
# LLM_RESPONSE_ID = "gen_ai.response.id"
# LLM
LLM_REQUEST_TYPE = "llm.request.type"
LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
LLM_USER = "llm.user"
LLM_HEADERS = "llm.headers"
LLM_TOP_K = "llm.top_k"
LLM_IS_STREAMING = "llm.is_streaming"
LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
LLM_PRESENCE_PENALTY = "llm.presence_penalty"
LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
LLM_REQUEST_FUNCTIONS = "llm.request.functions"
LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
# OpenAI
LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
class ManagementEndpointLoggingPayload(LiteLLMBase):
route: str
request_data: dict
response: Optional[dict] = None
exception: Optional[Any] = None
start_time: Optional[datetime] = None
end_time: Optional[datetime] = None

View file

@ -151,8 +151,8 @@ def common_checks(
and route != "/models" and route != "/models"
): ):
if global_proxy_spend > litellm.max_budget: if global_proxy_spend > litellm.max_budget:
raise Exception( raise litellm.BudgetExceededError(
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}" current_cost=global_proxy_spend, max_budget=litellm.max_budget
) )
return True return True

View file

@ -0,0 +1,31 @@
from typing import Optional
from fastapi import Request
import ast, json
async def _read_request_body(request: Optional[Request]) -> dict:
"""
Asynchronous function to read the request body and parse it as JSON or literal data.
Parameters:
- request: The request object to read the body from
Returns:
- dict: Parsed request data as a dictionary
"""
try:
request_data: dict = {}
if request is None:
return request_data
body = await request.body()
if body == b"" or body is None:
return request_data
body_str = body.decode()
try:
request_data = ast.literal_eval(body_str)
except:
request_data = json.loads(body_str)
return request_data
except:
return {}

View file

@ -0,0 +1,90 @@
from datetime import datetime
from functools import wraps
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from fastapi import Request
def management_endpoint_wrapper(func):
"""
This wrapper does the following:
1. Log I/O, Exceptions to OTEL
2. Create an Audit log for success calls
"""
@wraps(func)
async def wrapper(*args, **kwargs):
start_time = datetime.now()
try:
result = await func(*args, **kwargs)
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
_response = dict(result) if result is not None else None
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=_response,
start_time=start_time,
end_time=end_time,
)
await open_telemetry_logger.async_management_endpoint_success_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
return result
except Exception as e:
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=None,
start_time=start_time,
end_time=end_time,
exception=e,
)
await open_telemetry_logger.async_management_endpoint_failure_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
raise e
return wrapper

View file

@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
data["cache"][k] = v data["cache"][k] = v
verbose_proxy_logger.debug("receiving data: %s", data) verbose_proxy_logger.debug("receiving data: %s", data)
# users can pass in 'user' param to /chat/completions. Don't override it
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
# if users are using user_api_key_auth, set `user` in `data`
data["user"] = user_api_key_dict.user_id
if "metadata" not in data: if "metadata" not in data:
data["metadata"] = {} data["metadata"] = {}

View file

@ -0,0 +1,63 @@
# What is this?
## Helper utils for the management endpoints (keys/users/teams)
from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
from litellm.proxy.utils import PrismaClient
import uuid
from typing import Optional
async def add_new_member(
new_member: Member,
max_budget_in_team: Optional[float],
prisma_client: PrismaClient,
team_id: str,
user_api_key_dict: UserAPIKeyAuth,
litellm_proxy_admin_name: str,
):
"""
Add a new member to a team
- add team id to user table
- add team member w/ budget to team member table
"""
## ADD TEAM ID, to USER TABLE IF NEW ##
if new_member.user_id is not None:
await prisma_client.db.litellm_usertable.update(
where={"user_id": new_member.user_id},
data={"teams": {"push": [team_id]}},
)
elif new_member.user_email is not None:
user_data = {"user_id": str(uuid.uuid4()), "user_email": new_member.user_email}
## user email is not unique acc. to prisma schema -> future improvement
### for now: check if it exists in db, if not - insert it
existing_user_row = await prisma_client.get_data(
key_val={"user_email": new_member.user_email},
table_name="user",
query_type="find_all",
)
if existing_user_row is None or (
isinstance(existing_user_row, list) and len(existing_user_row) == 0
):
await prisma_client.insert_data(data=user_data, table_name="user")
# Check if trying to set a budget for team member
if max_budget_in_team is not None and new_member.user_id is not None:
# create a new budget item for this member
response = await prisma_client.db.litellm_budgettable.create(
data={
"max_budget": max_budget_in_team,
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
)
_budget_id = response.budget_id
await prisma_client.db.litellm_teammembership.create(
data={
"team_id": team_id,
"user_id": new_member.user_id,
"budget_id": _budget_id,
}
)

View file

@ -14,10 +14,9 @@ model_list:
litellm_params: litellm_params:
model: openai/* model: openai/*
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
- model_name: my-triton-model - model_name: mistral-embed
litellm_params: litellm_params:
model: triton/any" model: mistral/mistral-embed
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234

View file

@ -90,6 +90,7 @@ from litellm.types.llms.openai import (
HttpxBinaryResponseContent, HttpxBinaryResponseContent,
) )
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
from litellm.proxy.management_helpers.utils import add_new_member
from litellm.proxy.utils import ( from litellm.proxy.utils import (
PrismaClient, PrismaClient,
DBClient, DBClient,
@ -102,7 +103,6 @@ from litellm.proxy.utils import (
hash_token, hash_token,
html_form, html_form,
missing_keys_html_form, missing_keys_html_form,
_read_request_body,
_is_valid_team_configs, _is_valid_team_configs,
_is_user_proxy_admin, _is_user_proxy_admin,
_get_user_role, _get_user_role,
@ -114,6 +114,8 @@ from litellm.proxy.utils import (
_to_ns, _to_ns,
get_error_message_str, get_error_message_str,
) )
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm import ( from litellm import (
CreateBatchRequest, CreateBatchRequest,
RetrieveBatchRequest, RetrieveBatchRequest,
@ -160,6 +162,10 @@ from litellm.proxy.auth.auth_checks import (
get_user_object, get_user_object,
allowed_routes_check, allowed_routes_check,
get_actual_routes, get_actual_routes,
log_to_opentelemetry,
)
from litellm.proxy.common_utils.management_endpoint_utils import (
management_endpoint_wrapper,
) )
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.exceptions import RejectedRequestError from litellm.exceptions import RejectedRequestError
@ -368,6 +374,11 @@ from typing import Dict
api_key_header = APIKeyHeader( api_key_header = APIKeyHeader(
name="Authorization", auto_error=False, description="Bearer token" name="Authorization", auto_error=False, description="Bearer token"
) )
azure_api_key_header = APIKeyHeader(
name="API-Key",
auto_error=False,
description="Some older versions of the openai Python package will send an API-Key header with just the API key ",
)
user_api_base = None user_api_base = None
user_model = None user_model = None
user_debug = False user_debug = False
@ -508,18 +519,27 @@ async def check_request_disconnection(request: Request, llm_api_call_task):
async def user_api_key_auth( async def user_api_key_auth(
request: Request, api_key: str = fastapi.Security(api_key_header) request: Request,
api_key: str = fastapi.Security(api_key_header),
azure_api_key_header: str = fastapi.Security(azure_api_key_header),
) -> UserAPIKeyAuth: ) -> UserAPIKeyAuth:
global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj
try: try:
if isinstance(api_key, str): if isinstance(api_key, str):
passed_in_key = api_key passed_in_key = api_key
api_key = _get_bearer_token(api_key=api_key) api_key = _get_bearer_token(api_key=api_key)
elif isinstance(azure_api_key_header, str):
api_key = azure_api_key_header
parent_otel_span: Optional[Span] = None parent_otel_span: Optional[Span] = None
if open_telemetry_logger is not None: if open_telemetry_logger is not None:
parent_otel_span = open_telemetry_logger.tracer.start_span( parent_otel_span = open_telemetry_logger.tracer.start_span(
name="Received Proxy Server Request", name="Received Proxy Server Request",
start_time=_to_ns(datetime.now()), start_time=_to_ns(datetime.now()),
context=open_telemetry_logger.get_traceparent_from_header(
headers=request.headers
),
) )
### USER-DEFINED AUTH FUNCTION ### ### USER-DEFINED AUTH FUNCTION ###
if user_custom_auth is not None: if user_custom_auth is not None:
@ -1062,8 +1082,9 @@ async def user_api_key_auth(
_user_id = _user.get("user_id", None) _user_id = _user.get("user_id", None)
if user_current_spend > user_max_budget: if user_current_spend > user_max_budget:
raise Exception( raise litellm.BudgetExceededError(
f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" current_cost=user_current_spend,
max_budget=user_max_budget,
) )
else: else:
# Token exists, not expired now check if its in budget for the user # Token exists, not expired now check if its in budget for the user
@ -1094,9 +1115,11 @@ async def user_api_key_auth(
) )
if user_current_spend > user_max_budget: if user_current_spend > user_max_budget:
raise Exception( raise litellm.BudgetExceededError(
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" current_cost=user_current_spend,
max_budget=user_max_budget,
) )
# Check 3. Check if user is in their team budget # Check 3. Check if user is in their team budget
if valid_token.team_member_spend is not None: if valid_token.team_member_spend is not None:
if prisma_client is not None: if prisma_client is not None:
@ -1130,8 +1153,9 @@ async def user_api_key_auth(
) )
if team_member_budget is not None and team_member_budget > 0: if team_member_budget is not None and team_member_budget > 0:
if valid_token.team_member_spend > team_member_budget: if valid_token.team_member_spend > team_member_budget:
raise Exception( raise litellm.BudgetExceededError(
f"ExceededBudget: Crossed spend within team. UserID: {valid_token.user_id}, in team {valid_token.team_id} has exceeded their budget. Current spend: {valid_token.team_member_spend}; Max Budget: {team_member_budget}" current_cost=valid_token.team_member_spend,
max_budget=team_member_budget,
) )
# Check 3. If token is expired # Check 3. If token is expired
@ -1189,8 +1213,9 @@ async def user_api_key_auth(
#################################### ####################################
if valid_token.spend >= valid_token.max_budget: if valid_token.spend >= valid_token.max_budget:
raise Exception( raise litellm.BudgetExceededError(
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}" current_cost=valid_token.spend,
max_budget=valid_token.max_budget,
) )
# Check 5. Token Model Spend is under Model budget # Check 5. Token Model Spend is under Model budget
@ -1226,8 +1251,9 @@ async def user_api_key_auth(
): ):
current_model_spend = model_spend[0]["_sum"]["spend"] current_model_spend = model_spend[0]["_sum"]["spend"]
current_model_budget = max_budget_per_model[current_model] current_model_budget = max_budget_per_model[current_model]
raise Exception( raise litellm.BudgetExceededError(
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}" current_cost=current_model_spend,
max_budget=current_model_budget,
) )
# Check 6. Team spend is under Team budget # Check 6. Team spend is under Team budget
@ -1251,8 +1277,9 @@ async def user_api_key_auth(
) )
if valid_token.team_spend >= valid_token.team_max_budget: if valid_token.team_spend >= valid_token.team_max_budget:
raise Exception( raise litellm.BudgetExceededError(
f"ExceededTokenBudget: Current Team Spend: {valid_token.team_spend}; Max Budget for Team: {valid_token.team_max_budget}" current_cost=valid_token.team_spend,
max_budget=valid_token.team_max_budget,
) )
# Check 8: Additional Common Checks across jwt + key auth # Check 8: Additional Common Checks across jwt + key auth
@ -1495,7 +1522,7 @@ async def user_api_key_auth(
) )
if valid_token is None: if valid_token is None:
# No token was found when looking up in the DB # No token was found when looking up in the DB
raise Exception("Invalid token passed") raise Exception("Invalid proxy server token passed")
if valid_token_dict is not None: if valid_token_dict is not None:
if user_id_information is not None and _is_user_proxy_admin( if user_id_information is not None and _is_user_proxy_admin(
user_id_information user_id_information
@ -1528,6 +1555,14 @@ async def user_api_key_auth(
str(e) str(e)
) )
) )
# Log this exception to OTEL
if open_telemetry_logger is not None:
await open_telemetry_logger.async_post_call_failure_hook(
original_exception=e,
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
)
verbose_proxy_logger.debug(traceback.format_exc()) verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, litellm.BudgetExceededError): if isinstance(e, litellm.BudgetExceededError):
raise ProxyException( raise ProxyException(
@ -7803,6 +7838,10 @@ async def get_global_spend_report(
default=None, default=None,
description="Time till which to view spend", description="Time till which to view spend",
), ),
group_by: Optional[Literal["team", "customer"]] = fastapi.Query(
default="team",
description="Group spend by internal team or customer",
),
): ):
""" """
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
@ -7849,69 +7888,130 @@ async def get_global_spend_report(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
) )
# first get data from spend logs -> SpendByModelApiKey if group_by == "team":
# then read data from "SpendByModelApiKey" to format the response obj # first get data from spend logs -> SpendByModelApiKey
sql_query = """ # then read data from "SpendByModelApiKey" to format the response obj
sql_query = """
WITH SpendByModelApiKey AS ( WITH SpendByModelApiKey AS (
SELECT SELECT
date_trunc('day', sl."startTime") AS group_by_day, date_trunc('day', sl."startTime") AS group_by_day,
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name, COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
sl.model, sl.model,
sl.api_key, sl.api_key,
SUM(sl.spend) AS model_api_spend, SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens SUM(sl.total_tokens) AS model_api_tokens
FROM FROM
"LiteLLM_SpendLogs" sl "LiteLLM_SpendLogs" sl
LEFT JOIN LEFT JOIN
"LiteLLM_TeamTable" tt "LiteLLM_TeamTable" tt
ON ON
sl.team_id = tt.team_id sl.team_id = tt.team_id
WHERE WHERE
sl."startTime" BETWEEN $1::date AND $2::date sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY GROUP BY
date_trunc('day', sl."startTime"), date_trunc('day', sl."startTime"),
tt.team_alias, tt.team_alias,
sl.model, sl.model,
sl.api_key sl.api_key
) )
SELECT
group_by_day,
jsonb_agg(jsonb_build_object(
'team_name', team_name,
'total_spend', total_spend,
'metadata', metadata
)) AS teams
FROM (
SELECT
group_by_day,
team_name,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
team_name
) AS aggregated
GROUP BY
group_by_day
ORDER BY
group_by_day;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
return db_response
elif group_by == "customer":
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
sl.end_user AS customer,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
customer,
sl.model,
sl.api_key
)
SELECT SELECT
group_by_day, group_by_day,
jsonb_agg(jsonb_build_object( jsonb_agg(jsonb_build_object(
'team_name', team_name, 'customer', customer,
'total_spend', total_spend, 'total_spend', total_spend,
'metadata', metadata 'metadata', metadata
)) AS teams )) AS customers
FROM ( FROM
SELECT (
group_by_day, SELECT
team_name, group_by_day,
SUM(model_api_spend) AS total_spend, customer,
jsonb_agg(jsonb_build_object( SUM(model_api_spend) AS total_spend,
'model', model, jsonb_agg(jsonb_build_object(
'api_key', api_key, 'model', model,
'spend', model_api_spend, 'api_key', api_key,
'total_tokens', model_api_tokens 'spend', model_api_spend,
)) AS metadata 'total_tokens', model_api_tokens
FROM )) AS metadata
SpendByModelApiKey FROM
GROUP BY SpendByModelApiKey
group_by_day, GROUP BY
team_name group_by_day,
) AS aggregated customer
) AS aggregated
GROUP BY GROUP BY
group_by_day group_by_day
ORDER BY ORDER BY
group_by_day; group_by_day;
""" """
db_response = await prisma_client.db.query_raw( db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj sql_query, start_date_obj, end_date_obj
) )
if db_response is None: if db_response is None:
return [] return []
return db_response return db_response
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
@ -8097,7 +8197,9 @@ async def _get_spend_report_for_time_range(
return response, spend_per_tag return response, spend_per_tag
except Exception as e: except Exception as e:
verbose_proxy_logger.error("Exception in _get_daily_spend_reports", e) # noqa verbose_proxy_logger.error(
"Exception in _get_daily_spend_reports {}".format(str(e))
) # noqa
@router.post( @router.post(
@ -8755,7 +8857,7 @@ async def new_user(data: NewUserRequest):
- organization_id: Optional[str] - specify the org a user belongs to. - organization_id: Optional[str] - specify the org a user belongs to.
- user_email: Optional[str] - Specify a user email. - user_email: Optional[str] - Specify a user email.
- send_invite_email: Optional[bool] - Specify if an invite email should be sent. - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user" - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
- max_budget: Optional[float] - Specify max budget for a given user. - max_budget: Optional[float] - Specify max budget for a given user.
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute) - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
@ -8790,7 +8892,10 @@ async def new_user(data: NewUserRequest):
role="user", role="user",
user_email=data_json.get("user_email", None), user_email=data_json.get("user_email", None),
), ),
) ),
http_request=Request(
scope={"type": "http"},
),
) )
if data.send_invite_email is True: if data.send_invite_email is True:
@ -9823,8 +9928,10 @@ async def delete_end_user(
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
response_model=LiteLLM_TeamTable, response_model=LiteLLM_TeamTable,
) )
@management_endpoint_wrapper
async def new_team( async def new_team(
data: NewTeamRequest, data: NewTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header( litellm_changed_by: Optional[str] = Header(
None, None,
@ -10058,6 +10165,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
@router.post( @router.post(
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@management_endpoint_wrapper
async def update_team( async def update_team(
data: UpdateTeamRequest, data: UpdateTeamRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
@ -10163,8 +10271,10 @@ async def update_team(
tags=["team management"], tags=["team management"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
) )
@management_endpoint_wrapper
async def team_member_add( async def team_member_add(
data: TeamMemberAddRequest, data: TeamMemberAddRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
""" """
@ -10190,10 +10300,12 @@ async def team_member_add(
raise HTTPException(status_code=400, detail={"error": "No team id passed in"}) raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
if data.member is None: if data.member is None:
raise HTTPException(status_code=400, detail={"error": "No member passed in"}) raise HTTPException(
status_code=400, detail={"error": "No member/members passed in"}
)
existing_team_row = await prisma_client.get_data( # type: ignore existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
team_id=data.team_id, table_name="team", query_type="find_unique" where={"team_id": data.team_id}
) )
if existing_team_row is None: if existing_team_row is None:
raise HTTPException( raise HTTPException(
@ -10203,75 +10315,50 @@ async def team_member_add(
}, },
) )
new_member = data.member complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
existing_team_row.members_with_roles.append(new_member) if isinstance(data.member, Member):
# add to team db
new_member = data.member
complete_team_data = LiteLLM_TeamTable( complete_team_data.members_with_roles.append(new_member)
**_get_pydantic_json_dict(existing_team_row),
elif isinstance(data.member, List):
# add to team db
new_members = data.member
complete_team_data.members_with_roles.extend(new_members)
# ADD MEMBER TO TEAM
_db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
updated_team = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id},
data={"members_with_roles": json.dumps(_db_team_members)}, # type: ignore
) )
team_row = await prisma_client.update_data( if isinstance(data.member, Member):
update_key_values=complete_team_data.json(exclude_none=True), await add_new_member(
data=complete_team_data.json(exclude_none=True), new_member=data.member,
table_name="team", max_budget_in_team=data.max_budget_in_team,
team_id=data.team_id, prisma_client=prisma_client,
) user_api_key_dict=user_api_key_dict,
litellm_proxy_admin_name=litellm_proxy_admin_name,
## ADD USER, IF NEW ## team_id=data.team_id,
user_data = { # type: ignore
"teams": [team_row["team_id"]],
"models": team_row["data"].models,
}
if new_member.user_id is not None:
user_data["user_id"] = new_member.user_id # type: ignore
await prisma_client.update_data(
user_id=new_member.user_id,
data=user_data,
update_key_values_custom_query={
"teams": {
"push": [team_row["team_id"]],
}
},
table_name="user",
) )
elif new_member.user_email is not None: elif isinstance(data.member, List):
user_data["user_id"] = str(uuid.uuid4()) tasks: List = []
user_data["user_email"] = new_member.user_email for m in data.member:
## user email is not unique acc. to prisma schema -> future improvement await add_new_member(
### for now: check if it exists in db, if not - insert it new_member=m,
existing_user_row = await prisma_client.get_data( max_budget_in_team=data.max_budget_in_team,
key_val={"user_email": new_member.user_email}, prisma_client=prisma_client,
table_name="user", user_api_key_dict=user_api_key_dict,
query_type="find_all", litellm_proxy_admin_name=litellm_proxy_admin_name,
) team_id=data.team_id,
if existing_user_row is None or ( )
isinstance(existing_user_row, list) and len(existing_user_row) == 0 await asyncio.gather(*tasks)
):
await prisma_client.insert_data(data=user_data, table_name="user") return updated_team
# Check if trying to set a budget for team member
if data.max_budget_in_team is not None and new_member.user_id is not None:
# create a new budget item for this member
response = await prisma_client.db.litellm_budgettable.create(
data={
"max_budget": data.max_budget_in_team,
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
)
_budget_id = response.budget_id
await prisma_client.db.litellm_teammembership.create(
data={
"team_id": data.team_id,
"user_id": new_member.user_id,
"budget_id": _budget_id,
}
)
return team_row
@router.post( @router.post(
@ -10279,8 +10366,10 @@ async def team_member_add(
tags=["team management"], tags=["team management"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
) )
@management_endpoint_wrapper
async def team_member_delete( async def team_member_delete(
data: TeamMemberDeleteRequest, data: TeamMemberDeleteRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
""" """
@ -10384,8 +10473,10 @@ async def team_member_delete(
@router.post( @router.post(
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@management_endpoint_wrapper
async def delete_team( async def delete_team(
data: DeleteTeamRequest, data: DeleteTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header( litellm_changed_by: Optional[str] = Header(
None, None,
@ -10469,10 +10560,12 @@ async def delete_team(
@router.get( @router.get(
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@management_endpoint_wrapper
async def team_info( async def team_info(
http_request: Request,
team_id: str = fastapi.Query( team_id: str = fastapi.Query(
default=None, description="Team ID in the request parameters" default=None, description="Team ID in the request parameters"
) ),
): ):
""" """
get info on team + related keys get info on team + related keys
@ -10556,8 +10649,10 @@ async def team_info(
@router.post( @router.post(
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@management_endpoint_wrapper
async def block_team( async def block_team(
data: BlockTeamRequest, data: BlockTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
""" """
@ -10578,8 +10673,10 @@ async def block_team(
@router.post( @router.post(
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@management_endpoint_wrapper
async def unblock_team( async def unblock_team(
data: BlockTeamRequest, data: BlockTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
""" """
@ -10600,7 +10697,9 @@ async def unblock_team(
@router.get( @router.get(
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@management_endpoint_wrapper
async def list_team( async def list_team(
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
""" """
@ -13007,7 +13106,9 @@ async def auth_callback(request: Request):
user_role = getattr(result, generic_user_role_attribute_name, None) user_role = getattr(result, generic_user_role_attribute_name, None)
if user_id is None: if user_id is None:
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "") _first_name = getattr(result, "first_name", "") or ""
_last_name = getattr(result, "last_name", "") or ""
user_id = _first_name + _last_name
user_info = None user_info = None
user_id_models: List = [] user_id_models: List = []

View file

@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
updated_at DateTime @default(now()) @updatedAt @map("updated_at") updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}") model_spend Json @default("{}")
model_max_budget Json @default("{}") model_max_budget Json @default("{}")
model_id Int? @unique model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id]) litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
} }

View file

@ -0,0 +1,41 @@
# mypy: ignore-errors
import openai
from opentelemetry import trace
from opentelemetry.context import Context
from opentelemetry.trace import SpanKind
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
trace.set_tracer_provider(TracerProvider())
memory_exporter = InMemorySpanExporter()
span_processor = SimpleSpanProcessor(memory_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
tracer = trace.get_tracer(__name__)
# create an otel traceparent header
tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("ishaan-local-dev-app") as span:
span.set_attribute("generation_name", "ishaan-generation-openai-client")
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
extra_headers = {}
context = trace.set_span_in_context(span)
traceparent = TraceContextTextMapPropagator()
traceparent.inject(carrier=extra_headers, context=context)
print("EXTRA HEADERS: ", extra_headers)
_trace_parent = extra_headers.get("traceparent")
trace_id = _trace_parent.split("-")[1]
print("Trace ID: ", trace_id)
# # request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
extra_headers=extra_headers,
)
print(response)

View file

@ -0,0 +1,21 @@
# mypy: ignore-errors
import openai
import uuid
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
extra_headers = {"traceparent": example_traceparent}
_trace_id = example_traceparent.split("-")[1]
print("EXTRA HEADERS: ", extra_headers)
print("Trace ID: ", _trace_id)
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
extra_headers=extra_headers,
)
print(response)

View file

@ -48,6 +48,7 @@ from datetime import datetime, timedelta
from litellm.integrations.slack_alerting import SlackAlerting from litellm.integrations.slack_alerting import SlackAlerting
from typing_extensions import overload from typing_extensions import overload
from functools import wraps from functools import wraps
from fastapi import Request
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
@ -2017,6 +2018,7 @@ def get_logging_payload(
user_api_key_team_id=None, user_api_key_team_id=None,
user_api_key_user_id=None, user_api_key_user_id=None,
user_api_key_team_alias=None, user_api_key_team_alias=None,
spend_logs_metadata=None,
) )
if isinstance(metadata, dict): if isinstance(metadata, dict):
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
@ -2595,36 +2597,6 @@ async def update_spend(
raise e raise e
async def _read_request_body(request):
"""
Asynchronous function to read the request body and parse it as JSON or literal data.
Parameters:
- request: The request object to read the body from
Returns:
- dict: Parsed request data as a dictionary
"""
import ast, json
try:
request_data = {}
if request is None:
return request_data
body = await request.body()
if body == b"" or body is None:
return request_data
body_str = body.decode()
try:
request_data = ast.literal_eval(body_str)
except:
request_data = json.loads(body_str)
return request_data
except:
return {}
def _is_projected_spend_over_limit( def _is_projected_spend_over_limit(
current_spend: float, soft_budget_limit: Optional[float] current_spend: float, soft_budget_limit: Optional[float]
): ):

View file

@ -2057,11 +2057,14 @@ class Router:
generic_fallback_idx: Optional[int] = None generic_fallback_idx: Optional[int] = None
## check for specific model group-specific fallbacks ## check for specific model group-specific fallbacks
for idx, item in enumerate(fallbacks): for idx, item in enumerate(fallbacks):
if list(item.keys())[0] == model_group: if isinstance(item, dict):
fallback_model_group = item[model_group] if list(item.keys())[0] == model_group:
break fallback_model_group = item[model_group]
elif list(item.keys())[0] == "*": break
generic_fallback_idx = idx elif list(item.keys())[0] == "*":
generic_fallback_idx = idx
elif isinstance(item, str):
fallback_model_group = [fallbacks.pop(idx)]
## if none, check for generic fallback ## if none, check for generic fallback
if ( if (
fallback_model_group is None fallback_model_group is None
@ -2310,13 +2313,15 @@ class Router:
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}") verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
fallback_model_group = None fallback_model_group = None
generic_fallback_idx: Optional[int] = None generic_fallback_idx: Optional[int] = None
## check for specific model group-specific fallbacks
for idx, item in enumerate(fallbacks): for idx, item in enumerate(fallbacks):
if list(item.keys())[0] == model_group: if isinstance(item, dict):
fallback_model_group = item[model_group] if list(item.keys())[0] == model_group:
break fallback_model_group = item[model_group]
elif list(item.keys())[0] == "*": break
generic_fallback_idx = idx elif list(item.keys())[0] == "*":
generic_fallback_idx = idx
elif isinstance(item, str):
fallback_model_group = [fallbacks.pop(idx)]
## if none, check for generic fallback ## if none, check for generic fallback
if ( if (
fallback_model_group is None fallback_model_group is None

View file

@ -810,6 +810,28 @@ def test_vertexai_embedding():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_vertexai_embedding_embedding_latest():
try:
load_vertex_ai_credentials()
litellm.set_verbose = True
response = embedding(
model="vertex_ai/text-embedding-004",
input=["hi"],
dimensions=1,
auto_truncate=True,
task_type="RETRIEVAL_QUERY",
)
assert len(response.data[0]["embedding"]) == 1
assert response.usage.prompt_tokens > 0
print(f"response:", response)
except litellm.RateLimitError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_vertexai_aembedding(): async def test_vertexai_aembedding():
try: try:

View file

@ -220,13 +220,13 @@ def test_completion_bedrock_claude_sts_oidc_auth():
aws_web_identity_token = "oidc/circleci_v2/" aws_web_identity_token = "oidc/circleci_v2/"
aws_region_name = os.environ["AWS_REGION_NAME"] aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] # aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually # TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci" aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
try: try:
litellm.set_verbose = True litellm.set_verbose = True
response = completion( response_1 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0", model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages, messages=messages,
max_tokens=10, max_tokens=10,
@ -236,8 +236,40 @@ def test_completion_bedrock_claude_sts_oidc_auth():
aws_role_name=aws_role_name, aws_role_name=aws_role_name,
aws_session_name="my-test-session", aws_session_name="my-test-session",
) )
# Add any assertions here to check the response print(response_1)
print(response) assert len(response_1.choices) > 0
assert len(response_1.choices[0].message.content) > 0
# This second call is to verify that the cache isn't breaking anything
response_2 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=5,
temperature=0.2,
aws_region_name=aws_region_name,
aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
print(response_2)
assert len(response_2.choices) > 0
assert len(response_2.choices[0].message.content) > 0
# This third call is to verify that the cache isn't used for a different region
response_3 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=6,
temperature=0.3,
aws_region_name="us-east-1",
aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
print(response_3)
assert len(response_3.choices) > 0
assert len(response_3.choices[0].message.content) > 0
except RateLimitError: except RateLimitError:
pass pass
except Exception as e: except Exception as e:
@ -255,7 +287,7 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
aws_web_identity_token = "oidc/circleci_v2/" aws_web_identity_token = "oidc/circleci_v2/"
aws_region_name = os.environ["AWS_REGION_NAME"] aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"] # aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually # TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci" aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
try: try:

View file

@ -16,7 +16,7 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
# litellm.num_retries=3 # litellm.num_retries = 3
litellm.cache = None litellm.cache = None
litellm.success_callback = [] litellm.success_callback = []
user_message = "Write a short poem about the sky" user_message = "Write a short poem about the sky"
@ -114,6 +114,27 @@ def test_null_role_response():
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
def test_completion_azure_ai_command_r():
try:
import os
litellm.set_verbose = True
os.environ["AZURE_AI_API_BASE"] = os.getenv("AZURE_COHERE_API_BASE", "")
os.environ["AZURE_AI_API_KEY"] = os.getenv("AZURE_COHERE_API_KEY", "")
response: litellm.ModelResponse = completion(
model="azure_ai/command-r-plus",
messages=[{"role": "user", "content": "What is the meaning of life?"}],
) # type: ignore
assert "azure_ai" in response.model
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_azure_command_r(): def test_completion_azure_command_r():
try: try:
litellm.set_verbose = True litellm.set_verbose = True
@ -530,6 +551,7 @@ def test_completion_cohere_command_r_plus_function_call():
messages=messages, messages=messages,
tools=tools, tools=tools,
tool_choice="auto", tool_choice="auto",
force_single_step=True,
) )
print(second_response) print(second_response)
except Exception as e: except Exception as e:
@ -720,7 +742,11 @@ def test_completion_claude_3_function_plus_image():
print(response) print(response)
def test_completion_azure_mistral_large_function_calling(): @pytest.mark.parametrize(
"provider",
["azure", "azure_ai"],
)
def test_completion_azure_mistral_large_function_calling(provider):
""" """
This primarily tests if the 'Function()' pydantic object correctly handles argument param passed in as a dict vs. string This primarily tests if the 'Function()' pydantic object correctly handles argument param passed in as a dict vs. string
""" """
@ -751,8 +777,9 @@ def test_completion_azure_mistral_large_function_calling():
"content": "What's the weather like in Boston today in Fahrenheit?", "content": "What's the weather like in Boston today in Fahrenheit?",
} }
] ]
response = completion( response = completion(
model="azure/mistral-large-latest", model="{}/mistral-large-latest".format(provider),
api_base=os.getenv("AZURE_MISTRAL_API_BASE"), api_base=os.getenv("AZURE_MISTRAL_API_BASE"),
api_key=os.getenv("AZURE_MISTRAL_API_KEY"), api_key=os.getenv("AZURE_MISTRAL_API_KEY"),
messages=messages, messages=messages,

View file

@ -34,14 +34,15 @@ class MyCustomHandler(CustomLogger):
self.response_cost = 0 self.response_cost = 0
def log_pre_api_call(self, model, messages, kwargs): def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call") print("Pre-API Call")
traceback.print_stack()
self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {}) self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {})
def log_post_api_call(self, kwargs, response_obj, start_time, end_time): def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call") print("Post-API Call")
def log_stream_event(self, kwargs, response_obj, start_time, end_time): def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream") print("On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time): def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success") print(f"On Success")
@ -372,6 +373,7 @@ async def test_async_custom_handler_embedding_optional_param():
Tests if the openai optional params for embedding - user + encoding_format, Tests if the openai optional params for embedding - user + encoding_format,
are logged are logged
""" """
litellm.set_verbose = True
customHandler_optional_params = MyCustomHandler() customHandler_optional_params = MyCustomHandler()
litellm.callbacks = [customHandler_optional_params] litellm.callbacks = [customHandler_optional_params]
response = await litellm.aembedding( response = await litellm.aembedding(

View file

@ -55,8 +55,12 @@ async def test_content_policy_exception_azure():
except litellm.ContentPolicyViolationError as e: except litellm.ContentPolicyViolationError as e:
print("caught a content policy violation error! Passed") print("caught a content policy violation error! Passed")
print("exception", e) print("exception", e)
assert e.litellm_debug_info is not None
assert isinstance(e.litellm_debug_info, str)
assert len(e.litellm_debug_info) > 0
pass pass
except Exception as e: except Exception as e:
print()
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -195,6 +195,8 @@ async def test_aimage_generation_vertex_ai():
assert isinstance(d, litellm.ImageObject) assert isinstance(d, litellm.ImageObject)
print("data in response.data", d) print("data in response.data", d)
assert d.b64_json is not None assert d.b64_json is not None
except litellm.ServiceUnavailableError as e:
pass
except litellm.RateLimitError as e: except litellm.RateLimitError as e:
pass pass
except litellm.ContentPolicyViolationError: except litellm.ContentPolicyViolationError:

View file

@ -16,6 +16,7 @@ from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLMRoutes
from litellm.proxy.auth.handle_jwt import JWTHandler from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.caching import DualCache from litellm.caching import DualCache
from datetime import datetime, timedelta from datetime import datetime, timedelta
from fastapi import Request
public_key = { public_key = {
"kty": "RSA", "kty": "RSA",
@ -346,6 +347,7 @@ async def test_team_token_output(prisma_client, audience):
models=["gpt-3.5-turbo", "gpt-4"], models=["gpt-3.5-turbo", "gpt-4"],
), ),
user_api_key_dict=result, user_api_key_dict=result,
http_request=Request(scope={"type": "http"}),
) )
except Exception as e: except Exception as e:
pytest.fail(f"This should not fail - {str(e)}") pytest.fail(f"This should not fail - {str(e)}")
@ -534,6 +536,7 @@ async def test_user_token_output(
models=["gpt-3.5-turbo", "gpt-4"], models=["gpt-3.5-turbo", "gpt-4"],
), ),
user_api_key_dict=result, user_api_key_dict=result,
http_request=Request(scope={"type": "http"}),
) )
if default_team_id: if default_team_id:
await new_team( await new_team(
@ -544,6 +547,7 @@ async def test_user_token_output(
models=["gpt-3.5-turbo", "gpt-4"], models=["gpt-3.5-turbo", "gpt-4"],
), ),
user_api_key_dict=result, user_api_key_dict=result,
http_request=Request(scope={"type": "http"}),
) )
except Exception as e: except Exception as e:
pytest.fail(f"This should not fail - {str(e)}") pytest.fail(f"This should not fail - {str(e)}")

View file

@ -137,6 +137,7 @@ async def test_new_user_response(prisma_client):
NewTeamRequest( NewTeamRequest(
team_id=_team_id, team_id=_team_id,
), ),
http_request=Request(scope={"type": "http"}),
user_api_key_dict=UserAPIKeyAuth( user_api_key_dict=UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN, user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234", api_key="sk-1234",
@ -272,7 +273,7 @@ def test_call_with_invalid_key(prisma_client):
except Exception as e: except Exception as e:
print("Got Exception", e) print("Got Exception", e)
print(e.message) print(e.message)
assert "Authentication Error, Invalid token passed" in e.message assert "Authentication Error, Invalid proxy server token passed" in e.message
pass pass
@ -368,6 +369,7 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
new_team_response = await new_team( new_team_response = await new_team(
data=team_request, data=team_request,
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN), user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
http_request=Request(scope={"type": "http"}),
) )
print("new_team_response", new_team_response) print("new_team_response", new_team_response)
created_team_id = new_team_response["team_id"] created_team_id = new_team_response["team_id"]
@ -471,7 +473,7 @@ def test_call_with_user_over_budget(prisma_client):
asyncio.run(test()) asyncio.run(test())
except Exception as e: except Exception as e:
error_detail = e.message error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail assert "Budget has been exceeded" in error_detail
print(vars(e)) print(vars(e))
@ -652,7 +654,7 @@ def test_call_with_proxy_over_budget(prisma_client):
error_detail = e.message error_detail = e.message
else: else:
error_detail = traceback.format_exc() error_detail = traceback.format_exc()
assert "Authentication Error, ExceededBudget:" in error_detail assert "Budget has been exceeded" in error_detail
print(vars(e)) print(vars(e))
@ -730,7 +732,7 @@ def test_call_with_user_over_budget_stream(prisma_client):
asyncio.run(test()) asyncio.run(test())
except Exception as e: except Exception as e:
error_detail = e.message error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail assert "Budget has been exceeded" in error_detail
print(vars(e)) print(vars(e))
@ -827,7 +829,7 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
asyncio.run(test()) asyncio.run(test())
except Exception as e: except Exception as e:
error_detail = e.message error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail assert "Budget has been exceeded" in error_detail
print(vars(e)) print(vars(e))
@ -1086,6 +1088,7 @@ def test_generate_and_update_key(prisma_client):
api_key="sk-1234", api_key="sk-1234",
user_id="1234", user_id="1234",
), ),
http_request=Request(scope={"type": "http"}),
) )
_team_2 = "ishaan-special-team_{}".format(uuid.uuid4()) _team_2 = "ishaan-special-team_{}".format(uuid.uuid4())
@ -1098,6 +1101,7 @@ def test_generate_and_update_key(prisma_client):
api_key="sk-1234", api_key="sk-1234",
user_id="1234", user_id="1234",
), ),
http_request=Request(scope={"type": "http"}),
) )
request = NewUserRequest( request = NewUserRequest(
@ -1175,7 +1179,6 @@ def test_generate_and_update_key(prisma_client):
asyncio.run(test()) asyncio.run(test())
except Exception as e: except Exception as e:
print("Got Exception", e) print("Got Exception", e)
print(e.message)
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@ -1363,7 +1366,7 @@ def test_call_with_key_over_budget(prisma_client):
error_detail = e.message error_detail = e.message
else: else:
error_detail = str(e) error_detail = str(e)
assert "Authentication Error, ExceededTokenBudget:" in error_detail assert "Budget has been exceeded" in error_detail
print(vars(e)) print(vars(e))
@ -1477,7 +1480,7 @@ def test_call_with_key_over_model_budget(prisma_client):
# print(f"Error - {str(e)}") # print(f"Error - {str(e)}")
traceback.print_exc() traceback.print_exc()
error_detail = e.message error_detail = e.message
assert "Authentication Error, ExceededModelBudget:" in error_detail assert "Budget has been exceeded!" in error_detail
print(vars(e)) print(vars(e))
@ -1638,7 +1641,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
except Exception as e: except Exception as e:
print("Got Exception", e) print("Got Exception", e)
error_detail = e.message error_detail = e.message
assert "Authentication Error, ExceededTokenBudget:" in error_detail assert "Budget has been exceeded" in error_detail
print(vars(e)) print(vars(e))
@ -2051,6 +2054,7 @@ async def test_master_key_hashing(prisma_client):
api_key="sk-1234", api_key="sk-1234",
user_id="1234", user_id="1234",
), ),
http_request=Request(scope={"type": "http"}),
) )
_response = await new_user( _response = await new_user(
@ -2184,6 +2188,7 @@ async def test_create_update_team(prisma_client):
tpm_limit=20, tpm_limit=20,
rpm_limit=20, rpm_limit=20,
), ),
http_request=Request(scope={"type": "http"}),
user_api_key_dict=UserAPIKeyAuth( user_api_key_dict=UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN, user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234", api_key="sk-1234",
@ -2233,7 +2238,10 @@ async def test_create_update_team(prisma_client):
) )
# now hit team_info # now hit team_info
response = await team_info(team_id=_team_id) response = await team_info(
team_id=_team_id,
http_request=Request(scope={"type": "http"}),
)
print("RESPONSE from team_info", response) print("RESPONSE from team_info", response)

View file

@ -1059,3 +1059,53 @@ async def test_default_model_fallbacks(sync_mode, litellm_module_fallbacks):
assert isinstance(response, litellm.ModelResponse) assert isinstance(response, litellm.ModelResponse)
assert response.model is not None and response.model == "gpt-4o" assert response.model is not None and response.model == "gpt-4o"
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_client_side_fallbacks_list(sync_mode):
"""
Tests Client Side Fallbacks
User can pass "fallbacks": ["gpt-3.5-turbo"] and this should work
"""
router = Router(
model_list=[
{
"model_name": "bad-model",
"litellm_params": {
"model": "openai/my-bad-model",
"api_key": "my-bad-api-key",
},
},
{
"model_name": "my-good-model",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
],
)
if sync_mode:
response = router.completion(
model="bad-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
fallbacks=["my-good-model"],
mock_testing_fallbacks=True,
mock_response="Hey! nice day",
)
else:
response = await router.acompletion(
model="bad-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
fallbacks=["my-good-model"],
mock_testing_fallbacks=True,
mock_response="Hey! nice day",
)
assert isinstance(response, litellm.ModelResponse)
assert response.model is not None and response.model == "gpt-4o"

View file

@ -1463,6 +1463,10 @@ async def test_parallel_streaming_requests(sync_mode, model):
except RateLimitError: except RateLimitError:
pass pass
except litellm.InternalServerError as e:
if "predibase" in str(e).lower():
# only skip internal server error from predibase - their endpoint seems quite unstable
pass
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -2535,7 +2539,10 @@ def streaming_and_function_calling_format_tests(idx, chunk):
return extracted_chunk, finished return extracted_chunk, finished
def test_openai_streaming_and_function_calling(): @pytest.mark.parametrize(
"model", ["gpt-3.5-turbo", "anthropic.claude-3-sonnet-20240229-v1:0"]
)
def test_streaming_and_function_calling(model):
tools = [ tools = [
{ {
"type": "function", "type": "function",
@ -2556,16 +2563,21 @@ def test_openai_streaming_and_function_calling():
}, },
} }
] ]
messages = [{"role": "user", "content": "What is the weather like in Boston?"}] messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
try: try:
response = completion( litellm.set_verbose = True
model="gpt-3.5-turbo", response: litellm.CustomStreamWrapper = completion(
model=model,
tools=tools, tools=tools,
messages=messages, messages=messages,
stream=True, stream=True,
) tool_choice="required",
) # type: ignore
# Add any assertions here to check the response # Add any assertions here to check the response
for idx, chunk in enumerate(response): for idx, chunk in enumerate(response):
# continue
print("\n{}\n".format(chunk))
if idx == 0: if idx == 0:
assert ( assert (
chunk.choices[0].delta.tool_calls[0].function.arguments is not None chunk.choices[0].delta.tool_calls[0].function.arguments is not None
@ -2573,6 +2585,7 @@ def test_openai_streaming_and_function_calling():
assert isinstance( assert isinstance(
chunk.choices[0].delta.tool_calls[0].function.arguments, str chunk.choices[0].delta.tool_calls[0].function.arguments, str
) )
# assert False
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
raise e raise e

View file

@ -3990,6 +3990,7 @@ def test_async_text_completion():
asyncio.run(test_get_response()) asyncio.run(test_get_response())
@pytest.mark.skip(reason="Tgai endpoints are unstable")
def test_async_text_completion_together_ai(): def test_async_text_completion_together_ai():
litellm.set_verbose = True litellm.set_verbose = True
print("test_async_text_completion") print("test_async_text_completion")

View file

@ -187,12 +187,43 @@ def test_load_test_token_counter(model):
print("model={}, total test time={}".format(model, total_time)) print("model={}, total test time={}".format(model, total_time))
assert total_time < 10, f"Total encoding time > 10s, {total_time}" assert total_time < 10, f"Total encoding time > 10s, {total_time}"
def test_openai_token_with_image_and_text(): def test_openai_token_with_image_and_text():
model = "gpt-4o" model = "gpt-4o"
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []} full_request = {
"model": "gpt-4o",
"tools": [
{
"type": "function",
"function": {
"name": "json",
"parameters": {
"type": "object",
"required": ["clause"],
"properties": {"clause": {"type": "string"}},
},
"description": "Respond with a JSON object.",
},
}
],
"logprobs": False,
"messages": [
{
"role": "user",
"content": [
{
"text": "\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.",
"type": "text",
}
],
}
],
"tool_choice": {"type": "function", "function": {"name": "json"}},
"exclude_models": [],
"disable_fallback": False,
"exclude_providers": [],
}
messages = full_request.get("messages", []) messages = full_request.get("messages", [])
token_count = token_counter(model=model, messages=messages) token_count = token_counter(model=model, messages=messages)
print(token_count) print(token_count)
test_openai_token_with_image_and_text()

View file

@ -23,6 +23,7 @@ from litellm.utils import (
create_pretrained_tokenizer, create_pretrained_tokenizer,
create_tokenizer, create_tokenizer,
get_max_tokens, get_max_tokens,
get_supported_openai_params,
) )
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils' # Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
@ -386,3 +387,11 @@ def test_get_max_token_unit_test():
) # Returns a number instead of throwing an Exception ) # Returns a number instead of throwing an Exception
assert isinstance(max_tokens, int) assert isinstance(max_tokens, int)
def test_get_supported_openai_params() -> None:
# Mapped provider
assert isinstance(get_supported_openai_params("gpt-4"), list)
# Unmapped provider
assert get_supported_openai_params("nonexistent") is None

View file

@ -1,5 +1,6 @@
from typing import TypedDict, Any, Union, Optional, Literal, List from typing import TypedDict, Any, Union, Optional, Literal, List
import json import json
from .openai import ChatCompletionToolCallChunk
from typing_extensions import ( from typing_extensions import (
Self, Self,
Protocol, Protocol,
@ -118,6 +119,15 @@ class ToolBlockDeltaEvent(TypedDict):
input: str input: str
class ToolUseBlockStartEvent(TypedDict):
name: str
toolUseId: str
class ContentBlockStartEvent(TypedDict, total=False):
toolUse: Optional[ToolUseBlockStartEvent]
class ContentBlockDeltaEvent(TypedDict, total=False): class ContentBlockDeltaEvent(TypedDict, total=False):
""" """
Either 'text' or 'toolUse' will be specified for Converse API streaming response. Either 'text' or 'toolUse' will be specified for Converse API streaming response.
@ -138,10 +148,11 @@ class RequestObject(TypedDict, total=False):
class GenericStreamingChunk(TypedDict): class GenericStreamingChunk(TypedDict):
text: Required[str] text: Required[str]
tool_str: Required[str] tool_use: Optional[ChatCompletionToolCallChunk]
is_finished: Required[bool] is_finished: Required[bool]
finish_reason: Required[str] finish_reason: Required[str]
usage: Optional[ConverseTokenUsageBlock] usage: Optional[ConverseTokenUsageBlock]
index: int
class Document(TypedDict): class Document(TypedDict):

View file

@ -296,14 +296,27 @@ class ListBatchRequest(TypedDict, total=False):
class ChatCompletionToolCallFunctionChunk(TypedDict): class ChatCompletionToolCallFunctionChunk(TypedDict):
name: str name: Optional[str]
arguments: str arguments: str
class ChatCompletionToolCallChunk(TypedDict): class ChatCompletionToolCallChunk(TypedDict):
id: Optional[str]
type: Literal["function"]
function: ChatCompletionToolCallFunctionChunk
class ChatCompletionDeltaToolCallChunk(TypedDict):
id: str id: str
type: Literal["function"] type: Literal["function"]
function: ChatCompletionToolCallFunctionChunk function: ChatCompletionToolCallFunctionChunk
index: int
class ChatCompletionDeltaChunk(TypedDict, total=False):
content: Optional[str]
tool_calls: List[ChatCompletionDeltaToolCallChunk]
role: str
class ChatCompletionResponseMessage(TypedDict, total=False): class ChatCompletionResponseMessage(TypedDict, total=False):

View file

@ -30,7 +30,7 @@ from dataclasses import (
dataclass, dataclass,
field, field,
) )
import os
import litellm._service_logger # for storing API inputs, outputs, and metadata import litellm._service_logger # for storing API inputs, outputs, and metadata
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
from litellm.caching import DualCache from litellm.caching import DualCache
@ -49,9 +49,9 @@ except (ImportError, AttributeError):
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers") filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
os.environ["TIKTOKEN_CACHE_DIR"] = ( os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071 "CUSTOM_TIKTOKEN_CACHE_DIR", filename
) ) # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
encoding = tiktoken.get_encoding("cl100k_base") encoding = tiktoken.get_encoding("cl100k_base")
from importlib import resources from importlib import resources
@ -63,6 +63,11 @@ claude_json_str = json.dumps(json_data)
import importlib.metadata import importlib.metadata
from ._logging import verbose_logger from ._logging import verbose_logger
from .types.router import LiteLLM_Params from .types.router import LiteLLM_Params
from .types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaToolCallChunk,
)
from .integrations.traceloop import TraceloopLogger from .integrations.traceloop import TraceloopLogger
from .integrations.athina import AthinaLogger from .integrations.athina import AthinaLogger
from .integrations.helicone import HeliconeLogger from .integrations.helicone import HeliconeLogger
@ -933,7 +938,6 @@ class TextCompletionResponse(OpenAIObject):
object=None, object=None,
**params, **params,
): ):
if stream: if stream:
object = "text_completion.chunk" object = "text_completion.chunk"
choices = [TextChoices()] choices = [TextChoices()]
@ -942,7 +946,6 @@ class TextCompletionResponse(OpenAIObject):
if choices is not None and isinstance(choices, list): if choices is not None and isinstance(choices, list):
new_choices = [] new_choices = []
for choice in choices: for choice in choices:
if isinstance(choice, TextChoices): if isinstance(choice, TextChoices):
_new_choice = choice _new_choice = choice
elif isinstance(choice, dict): elif isinstance(choice, dict):
@ -1018,7 +1021,6 @@ class ImageObject(OpenAIObject):
revised_prompt: Optional[str] = None revised_prompt: Optional[str] = None
def __init__(self, b64_json=None, url=None, revised_prompt=None): def __init__(self, b64_json=None, url=None, revised_prompt=None):
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
def __contains__(self, key): def __contains__(self, key):
@ -1342,28 +1344,29 @@ class Logging:
) )
else: else:
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
# log raw request to provider (like LangFuse) # log raw request to provider (like LangFuse) -- if opted in.
try: if litellm.log_raw_request_response is True:
# [Non-blocking Extra Debug Information in metadata] try:
_litellm_params = self.model_call_details.get("litellm_params", {}) # [Non-blocking Extra Debug Information in metadata]
_metadata = _litellm_params.get("metadata", {}) or {} _litellm_params = self.model_call_details.get("litellm_params", {})
if ( _metadata = _litellm_params.get("metadata", {}) or {}
litellm.turn_off_message_logging is not None if (
and litellm.turn_off_message_logging is True litellm.turn_off_message_logging is not None
): and litellm.turn_off_message_logging is True
):
_metadata["raw_request"] = (
"redacted by litellm. \
'litellm.turn_off_message_logging=True'"
)
else:
_metadata["raw_request"] = str(curl_command)
except Exception as e:
_metadata["raw_request"] = ( _metadata["raw_request"] = (
"redacted by litellm. \ "Unable to Log \
'litellm.turn_off_message_logging=True'" raw request: {}".format(
str(e)
)
) )
else:
_metadata["raw_request"] = str(curl_command)
except Exception as e:
_metadata["raw_request"] = (
"Unable to Log \
raw request: {}".format(
str(e)
)
)
if self.logger_fn and callable(self.logger_fn): if self.logger_fn and callable(self.logger_fn):
try: try:
self.logger_fn( self.logger_fn(
@ -1621,7 +1624,6 @@ class Logging:
end_time=end_time, end_time=end_time,
) )
except Exception as e: except Exception as e:
complete_streaming_response = None complete_streaming_response = None
else: else:
self.sync_streaming_chunks.append(result) self.sync_streaming_chunks.append(result)
@ -2391,7 +2393,6 @@ class Logging:
"async_complete_streaming_response" "async_complete_streaming_response"
in self.model_call_details in self.model_call_details
): ):
await customLogger.async_log_event( await customLogger.async_log_event(
kwargs=self.model_call_details, kwargs=self.model_call_details,
response_obj=self.model_call_details[ response_obj=self.model_call_details[
@ -2730,7 +2731,7 @@ class Logging:
only redacts when litellm.turn_off_message_logging == True only redacts when litellm.turn_off_message_logging == True
""" """
# check if user opted out of logging message/response to callbacks # check if user opted out of logging message/response to callbacks
if litellm.turn_off_message_logging == True: if litellm.turn_off_message_logging is True:
# remove messages, prompts, input, response from logging # remove messages, prompts, input, response from logging
self.model_call_details["messages"] = [ self.model_call_details["messages"] = [
{"role": "user", "content": "redacted-by-litellm"} {"role": "user", "content": "redacted-by-litellm"}
@ -3250,7 +3251,7 @@ def client(original_function):
stream=kwargs.get("stream", False), stream=kwargs.get("stream", False),
) )
if kwargs.get("stream", False) == True: if kwargs.get("stream", False) is True:
cached_result = CustomStreamWrapper( cached_result = CustomStreamWrapper(
completion_stream=cached_result, completion_stream=cached_result,
model=model, model=model,
@ -4030,7 +4031,10 @@ def openai_token_counter(
""" """
print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}") print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}")
try: try:
encoding = tiktoken.encoding_for_model(model) if "gpt-4o" in model:
encoding = tiktoken.get_encoding("o200k_base")
else:
encoding = tiktoken.encoding_for_model(model)
except KeyError: except KeyError:
print_verbose("Warning: model not found. Using cl100k_base encoding.") print_verbose("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base") encoding = tiktoken.get_encoding("cl100k_base")
@ -4894,6 +4898,18 @@ def get_optional_params_embeddings(
) )
final_params = {**optional_params, **kwargs} final_params = {**optional_params, **kwargs}
return final_params return final_params
if custom_llm_provider == "vertex_ai":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="vertex_ai",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.VertexAITextEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if custom_llm_provider == "vertex_ai": if custom_llm_provider == "vertex_ai":
if len(non_default_params.keys()) > 0: if len(non_default_params.keys()) > 0:
if litellm.drop_params is True: # drop the unsupported non-default values if litellm.drop_params is True: # drop the unsupported non-default values
@ -4927,7 +4943,18 @@ def get_optional_params_embeddings(
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.", message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
) )
return {**non_default_params, **kwargs} return {**non_default_params, **kwargs}
if custom_llm_provider == "mistral":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="mistral",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if ( if (
custom_llm_provider != "openai" custom_llm_provider != "openai"
and custom_llm_provider != "azure" and custom_llm_provider != "azure"
@ -6166,13 +6193,16 @@ def get_api_base(
if litellm.model_alias_map and model in litellm.model_alias_map: if litellm.model_alias_map and model in litellm.model_alias_map:
model = litellm.model_alias_map[model] model = litellm.model_alias_map[model]
try: try:
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = ( (
get_llm_provider( model,
model=model, custom_llm_provider,
custom_llm_provider=_optional_params.custom_llm_provider, dynamic_api_key,
api_base=_optional_params.api_base, dynamic_api_base,
api_key=_optional_params.api_key, ) = get_llm_provider(
) model=model,
custom_llm_provider=_optional_params.custom_llm_provider,
api_base=_optional_params.api_base,
api_key=_optional_params.api_key,
) )
except Exception as e: except Exception as e:
verbose_logger.debug("Error occurred in getting api base - {}".format(str(e))) verbose_logger.debug("Error occurred in getting api base - {}".format(str(e)))
@ -6220,7 +6250,7 @@ def get_first_chars_messages(kwargs: dict) -> str:
def get_supported_openai_params( def get_supported_openai_params(
model: str, model: str,
custom_llm_provider: str, custom_llm_provider: Optional[str] = None,
request_type: Literal["chat_completion", "embeddings"] = "chat_completion", request_type: Literal["chat_completion", "embeddings"] = "chat_completion",
) -> Optional[list]: ) -> Optional[list]:
""" """
@ -6235,6 +6265,11 @@ def get_supported_openai_params(
- List if custom_llm_provider is mapped - List if custom_llm_provider is mapped
- None if unmapped - None if unmapped
""" """
if not custom_llm_provider:
try:
custom_llm_provider = litellm.get_llm_provider(model=model)[1]
except BadRequestError:
return None
if custom_llm_provider == "bedrock": if custom_llm_provider == "bedrock":
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model) return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "ollama": elif custom_llm_provider == "ollama":
@ -6328,7 +6363,10 @@ def get_supported_openai_params(
"max_retries", "max_retries",
] ]
elif custom_llm_provider == "mistral": elif custom_llm_provider == "mistral":
return litellm.MistralConfig().get_supported_openai_params() if request_type == "chat_completion":
return litellm.MistralConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "replicate": elif custom_llm_provider == "replicate":
return [ return [
"stream", "stream",
@ -6370,7 +6408,10 @@ def get_supported_openai_params(
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini": elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"] return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
elif custom_llm_provider == "vertex_ai": elif custom_llm_provider == "vertex_ai":
return litellm.VertexAIConfig().get_supported_openai_params() if request_type == "chat_completion":
return litellm.VertexAIConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "sagemaker": elif custom_llm_provider == "sagemaker":
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"] return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
elif custom_llm_provider == "aleph_alpha": elif custom_llm_provider == "aleph_alpha":
@ -6577,6 +6618,9 @@ def get_llm_provider(
or get_secret("FIREWORKSAI_API_KEY") or get_secret("FIREWORKSAI_API_KEY")
or get_secret("FIREWORKS_AI_TOKEN") or get_secret("FIREWORKS_AI_TOKEN")
) )
elif custom_llm_provider == "azure_ai":
api_base = api_base or get_secret("AZURE_AI_API_BASE") # type: ignore
dynamic_api_key = api_key or get_secret("AZURE_AI_API_KEY")
elif custom_llm_provider == "mistral": elif custom_llm_provider == "mistral":
# mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai # mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai
api_base = ( api_base = (
@ -7458,7 +7502,6 @@ def validate_environment(model: Optional[str] = None) -> dict:
def set_callbacks(callback_list, function_id=None): def set_callbacks(callback_list, function_id=None):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
try: try:
@ -8767,6 +8810,13 @@ def exception_type(
response=original_exception.response, response=original_exception.response,
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
) )
if "Request failed during generation" in error_str:
# this is an internal server error from predibase
raise litellm.InternalServerError(
message=f"PredibaseException - {error_str}",
llm_provider="predibase",
model=model,
)
elif hasattr(original_exception, "status_code"): elif hasattr(original_exception, "status_code"):
if original_exception.status_code == 500: if original_exception.status_code == 500:
exception_mapping_worked = True exception_mapping_worked = True
@ -9085,7 +9135,7 @@ def exception_type(
): ):
exception_mapping_worked = True exception_mapping_worked = True
raise RateLimitError( raise RateLimitError(
message=f"VertexAIException RateLimitError - {error_str}", message=f"litellm.RateLimitError: VertexAIException - {error_str}",
model=model, model=model,
llm_provider="vertex_ai", llm_provider="vertex_ai",
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
@ -9097,7 +9147,14 @@ def exception_type(
), ),
), ),
) )
elif "500 Internal Server Error" in error_str:
exception_mapping_worked = True
raise ServiceUnavailableError(
message=f"litellm.ServiceUnavailableError: VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
)
if hasattr(original_exception, "status_code"): if hasattr(original_exception, "status_code"):
if original_exception.status_code == 400: if original_exception.status_code == 400:
exception_mapping_worked = True exception_mapping_worked = True
@ -10048,6 +10105,14 @@ def get_secret(
return oidc_token return oidc_token
else: else:
raise ValueError("Github OIDC provider failed") raise ValueError("Github OIDC provider failed")
elif oidc_provider == "azure":
# https://azure.github.io/azure-workload-identity/docs/quick-start.html
azure_federated_token_file = os.getenv("AZURE_FEDERATED_TOKEN_FILE")
if azure_federated_token_file is None:
raise ValueError("AZURE_FEDERATED_TOKEN_FILE not found in environment")
with open(azure_federated_token_file, "r") as f:
oidc_token = f.read()
return oidc_token
else: else:
raise ValueError("Unsupported OIDC provider") raise ValueError("Unsupported OIDC provider")
@ -11311,7 +11376,6 @@ class CustomStreamWrapper:
raise StopIteration raise StopIteration
response_obj: GenericStreamingChunk = chunk response_obj: GenericStreamingChunk = chunk
completion_obj["content"] = response_obj["text"] completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]: if response_obj["is_finished"]:
self.received_finish_reason = response_obj["finish_reason"] self.received_finish_reason = response_obj["finish_reason"]
@ -11326,6 +11390,10 @@ class CustomStreamWrapper:
completion_tokens=response_obj["usage"]["outputTokens"], completion_tokens=response_obj["usage"]["outputTokens"],
total_tokens=response_obj["usage"]["totalTokens"], total_tokens=response_obj["usage"]["totalTokens"],
) )
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
completion_obj["tool_calls"] = [response_obj["tool_use"]]
elif self.custom_llm_provider == "sagemaker": elif self.custom_llm_provider == "sagemaker":
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}") print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
response_obj = self.handle_sagemaker_stream(chunk) response_obj = self.handle_sagemaker_stream(chunk)
@ -11342,7 +11410,6 @@ class CustomStreamWrapper:
new_chunk = self.completion_stream[:chunk_size] new_chunk = self.completion_stream[:chunk_size]
completion_obj["content"] = new_chunk completion_obj["content"] = new_chunk
self.completion_stream = self.completion_stream[chunk_size:] self.completion_stream = self.completion_stream[chunk_size:]
time.sleep(0.05)
elif self.custom_llm_provider == "palm": elif self.custom_llm_provider == "palm":
# fake streaming # fake streaming
response_obj = {} response_obj = {}
@ -11355,7 +11422,6 @@ class CustomStreamWrapper:
new_chunk = self.completion_stream[:chunk_size] new_chunk = self.completion_stream[:chunk_size]
completion_obj["content"] = new_chunk completion_obj["content"] = new_chunk
self.completion_stream = self.completion_stream[chunk_size:] self.completion_stream = self.completion_stream[chunk_size:]
time.sleep(0.05)
elif self.custom_llm_provider == "ollama": elif self.custom_llm_provider == "ollama":
response_obj = self.handle_ollama_stream(chunk) response_obj = self.handle_ollama_stream(chunk)
completion_obj["content"] = response_obj["text"] completion_obj["content"] = response_obj["text"]
@ -11442,7 +11508,7 @@ class CustomStreamWrapper:
# for azure, we need to pass the model from the orignal chunk # for azure, we need to pass the model from the orignal chunk
self.model = chunk.model self.model = chunk.model
response_obj = self.handle_openai_chat_completion_chunk(chunk) response_obj = self.handle_openai_chat_completion_chunk(chunk)
if response_obj == None: if response_obj is None:
return return
completion_obj["content"] = response_obj["text"] completion_obj["content"] = response_obj["text"]
print_verbose(f"completion obj content: {completion_obj['content']}") print_verbose(f"completion obj content: {completion_obj['content']}")
@ -11575,7 +11641,7 @@ class CustomStreamWrapper:
else: else:
if ( if (
self.stream_options is not None self.stream_options is not None
and self.stream_options["include_usage"] == True and self.stream_options["include_usage"] is True
): ):
return model_response return model_response
return return
@ -11600,8 +11666,14 @@ class CustomStreamWrapper:
return model_response return model_response
elif ( elif (
"content" in completion_obj "content" in completion_obj
and isinstance(completion_obj["content"], str) and (
and len(completion_obj["content"]) > 0 isinstance(completion_obj["content"], str)
and len(completion_obj["content"]) > 0
)
or (
"tool_calls" in completion_obj
and len(completion_obj["tool_calls"]) > 0
)
): # cannot set content of an OpenAI Object to be an empty string ): # cannot set content of an OpenAI Object to be an empty string
hold, model_response_str = self.check_special_tokens( hold, model_response_str = self.check_special_tokens(
chunk=completion_obj["content"], chunk=completion_obj["content"],
@ -11657,7 +11729,7 @@ class CustomStreamWrapper:
else: else:
## else ## else
completion_obj["content"] = model_response_str completion_obj["content"] = model_response_str
if self.sent_first_chunk == False: if self.sent_first_chunk is False:
completion_obj["role"] = "assistant" completion_obj["role"] = "assistant"
self.sent_first_chunk = True self.sent_first_chunk = True
model_response.choices[0].delta = Delta(**completion_obj) model_response.choices[0].delta = Delta(**completion_obj)
@ -11666,7 +11738,7 @@ class CustomStreamWrapper:
else: else:
return return
elif self.received_finish_reason is not None: elif self.received_finish_reason is not None:
if self.sent_last_chunk == True: if self.sent_last_chunk is True:
raise StopIteration raise StopIteration
# flush any remaining holding chunk # flush any remaining holding chunk
if len(self.holding_chunk) > 0: if len(self.holding_chunk) > 0:

View file

@ -1387,6 +1387,26 @@
"mode": "image_generation", "mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
}, },
"text-embedding-004": {
"max_tokens": 3072,
"max_input_tokens": 3072,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"text-multilingual-embedding-002": {
"max_tokens": 2048,
"max_input_tokens": 2048,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"textembedding-gecko": { "textembedding-gecko": {
"max_tokens": 3072, "max_tokens": 3072,
"max_input_tokens": 3072, "max_input_tokens": 3072,

50
poetry.lock generated
View file

@ -1545,6 +1545,53 @@ files = [
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
] ]
[[package]]
name = "mypy"
version = "1.10.0"
description = "Optional static typing for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"},
{file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"},
{file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"},
{file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"},
{file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"},
{file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"},
{file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"},
{file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"},
{file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"},
{file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"},
{file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"},
{file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"},
{file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"},
{file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"},
{file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"},
{file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"},
{file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"},
{file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"},
{file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"},
{file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"},
{file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"},
{file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"},
{file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"},
{file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"},
{file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"},
{file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"},
{file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"},
]
[package.dependencies]
mypy-extensions = ">=1.0.0"
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
typing-extensions = ">=4.1.0"
[package.extras]
dmypy = ["psutil (>=4.0)"]
install-types = ["pip"]
mypyc = ["setuptools (>=50)"]
reports = ["lxml"]
[[package]] [[package]]
name = "mypy-extensions" name = "mypy-extensions"
version = "1.0.0" version = "1.0.0"
@ -2127,6 +2174,7 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@ -3150,4 +3198,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7" python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7" content-hash = "73054c657782120d170dc168ef07b494a916f1f810ff9c2b0ac878bd857a9dac"

View file

@ -85,6 +85,9 @@ model_list:
litellm_params: litellm_params:
model: openai/* model: openai/*
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
- model_name: mistral-embed
litellm_params:
model: mistral/mistral-embed
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model - model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
litellm_params: litellm_params:
model: text-completion-openai/gpt-3.5-turbo-instruct model: text-completion-openai/gpt-3.5-turbo-instruct

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.40.8" version = "1.40.9"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -19,7 +19,7 @@ documentation = "https://docs.litellm.ai"
python = ">=3.8.1,<4.0, !=3.9.7" python = ">=3.8.1,<4.0, !=3.9.7"
openai = ">=1.27.0" openai = ">=1.27.0"
python-dotenv = ">=0.2.0" python-dotenv = ">=0.2.0"
tiktoken = ">=0.4.0" tiktoken = ">=0.7.0"
importlib-metadata = ">=6.8.0" importlib-metadata = ">=6.8.0"
tokenizers = "*" tokenizers = "*"
click = "*" click = "*"
@ -76,6 +76,7 @@ litellm = 'litellm:run_server'
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
flake8 = "^6.1.0" flake8 = "^6.1.0"
black = "^23.12.0" black = "^23.12.0"
mypy = "^1.0"
pytest = "^7.4.3" pytest = "^7.4.3"
pytest-mock = "^3.12.0" pytest-mock = "^3.12.0"
@ -84,7 +85,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.40.8" version = "1.40.9"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -34,7 +34,7 @@ opentelemetry-exporter-otlp==1.25.0
### LITELLM PACKAGE DEPENDENCIES ### LITELLM PACKAGE DEPENDENCIES
python-dotenv==1.0.0 # for env python-dotenv==1.0.0 # for env
tiktoken==0.6.0 # for calculating usage tiktoken==0.7.0 # for calculating usage
importlib-metadata==6.8.0 # for random utils importlib-metadata==6.8.0 # for random utils
tokenizers==0.14.0 # for calculating usage tokenizers==0.14.0 # for calculating usage
click==8.1.7 # for proxy cli click==8.1.7 # for proxy cli

View file

@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
updated_at DateTime @default(now()) @updatedAt @map("updated_at") updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}") model_spend Json @default("{}")
model_max_budget Json @default("{}") model_max_budget Json @default("{}")
model_id Int? @unique model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id]) litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id]) litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
} }

View file

@ -664,7 +664,7 @@ async def test_key_crossing_budget():
response = await chat_completion(session=session, key=key) response = await chat_completion(session=session, key=key)
pytest.fail("Should have failed - Key crossed it's budget") pytest.fail("Should have failed - Key crossed it's budget")
except Exception as e: except Exception as e:
assert "ExceededTokenBudget: Current spend for token:" in str(e) assert "Budget has been exceeded!" in str(e)
@pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.skip(reason="AWS Suspended Account")

View file

@ -22,6 +22,7 @@ async def generate_key(
"text-embedding-ada-002", "text-embedding-ada-002",
"dall-e-2", "dall-e-2",
"fake-openai-endpoint-2", "fake-openai-endpoint-2",
"mistral-embed",
], ],
): ):
url = "http://0.0.0.0:4000/key/generate" url = "http://0.0.0.0:4000/key/generate"
@ -197,14 +198,14 @@ async def completion(session, key):
return response return response
async def embeddings(session, key): async def embeddings(session, key, model="text-embedding-ada-002"):
url = "http://0.0.0.0:4000/embeddings" url = "http://0.0.0.0:4000/embeddings"
headers = { headers = {
"Authorization": f"Bearer {key}", "Authorization": f"Bearer {key}",
"Content-Type": "application/json", "Content-Type": "application/json",
} }
data = { data = {
"model": "text-embedding-ada-002", "model": model,
"input": ["hello world"], "input": ["hello world"],
} }
@ -408,6 +409,9 @@ async def test_embeddings():
key_2 = key_gen["key"] key_2 = key_gen["key"]
await embeddings(session=session, key=key_2) await embeddings(session=session, key=key_2)
# embedding request with non OpenAI model
await embeddings(session=session, key=key, model="mistral-embed")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_image_generation(): async def test_image_generation():

View file

@ -49,7 +49,7 @@ async def new_user(
async def add_member( async def add_member(
session, i, team_id, user_id=None, user_email=None, max_budget=None session, i, team_id, user_id=None, user_email=None, max_budget=None, members=None
): ):
url = "http://0.0.0.0:4000/team/member_add" url = "http://0.0.0.0:4000/team/member_add"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
@ -58,10 +58,13 @@ async def add_member(
data["member"]["user_email"] = user_email data["member"]["user_email"] = user_email
elif user_id is not None: elif user_id is not None:
data["member"]["user_id"] = user_id data["member"]["user_id"] = user_id
elif members is not None:
data["member"] = members
if max_budget is not None: if max_budget is not None:
data["max_budget_in_team"] = max_budget data["max_budget_in_team"] = max_budget
print("sent data: {}".format(data))
async with session.post(url, headers=headers, json=data) as response: async with session.post(url, headers=headers, json=data) as response:
status = response.status status = response.status
response_text = await response.text() response_text = await response.text()
@ -339,7 +342,7 @@ async def test_team_info():
async def test_team_update_sc_2(): async def test_team_update_sc_2():
""" """
- Create team - Create team
- Add 1 user (doesn't exist in db) - Add 3 users (doesn't exist in db)
- Change team alias - Change team alias
- Check if it works - Check if it works
- Assert team object unchanged besides team alias - Assert team object unchanged besides team alias
@ -353,15 +356,20 @@ async def test_team_update_sc_2():
{"role": "admin", "user_id": admin_user}, {"role": "admin", "user_id": admin_user},
] ]
team_data = await new_team(session=session, i=0, member_list=member_list) team_data = await new_team(session=session, i=0, member_list=member_list)
## Create new normal user ## Create 10 normal users
new_normal_user = f"krrish_{uuid.uuid4()}@berri.ai" members = [
{"role": "user", "user_id": f"krrish_{uuid.uuid4()}@berri.ai"}
for _ in range(10)
]
await add_member( await add_member(
session=session, session=session, i=0, team_id=team_data["team_id"], members=members
i=0,
team_id=team_data["team_id"],
user_id=None,
user_email=new_normal_user,
) )
## ASSERT TEAM SIZE
team_info = await get_team_info(
session=session, get_team=team_data["team_id"], call_key="sk-1234"
)
assert len(team_info["team_info"]["members_with_roles"]) == 12
## CHANGE TEAM ALIAS ## CHANGE TEAM ALIAS
@ -570,4 +578,4 @@ async def test_users_in_team_budget():
except Exception as e: except Exception as e:
print("got exception, this is expected") print("got exception, this is expected")
print(e) print(e)
assert "Crossed spend within team" in str(e) assert "Budget has been exceeded" in str(e)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""] 3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,6 @@
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,6 @@
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -119,9 +119,24 @@ const ChatUI: React.FC<ChatUIProps> = ({
// Now, 'options' contains the list you wanted // Now, 'options' contains the list you wanted
console.log(options); // You can log it to verify the list console.log(options); // You can log it to verify the list
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available // if options.length > 0, only store unique values
setModelInfo(options); if (options.length > 0) {
const uniqueModels = Array.from(new Set(options));
console.log("Unique models:", uniqueModels);
// sort uniqueModels alphabetically
uniqueModels.sort((a: any, b: any) => a.label.localeCompare(b.label));
console.log("Model info:", modelInfo);
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
setModelInfo(uniqueModels);
}
setSelectedModel(fetchedAvailableModels.data[0].id); setSelectedModel(fetchedAvailableModels.data[0].id);
} }
} catch (error) { } catch (error) {

View file

@ -1130,7 +1130,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
setSelectedAPIKey(key); setSelectedAPIKey(key);
}} }}
> >
{key["key_alias"]} (Enterpise only Feature) {key["key_alias"]} (Enterprise only Feature)
</SelectItem> </SelectItem>
); );
} }
@ -1165,7 +1165,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
setSelectedCustomer(user); setSelectedCustomer(user);
}} }}
> >
{user} (Enterpise only Feature) {user} (Enterprise only Feature)
</SelectItem> </SelectItem>
); );
}) })

View file

@ -114,7 +114,7 @@ const Navbar: React.FC<NavbarProps> = ({
textDecoration: "underline", textDecoration: "underline",
}} }}
> >
Get enterpise license Get enterprise license
</a> </a>
</div> </div>
) : null} ) : null}

View file

@ -832,7 +832,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
// @ts-ignore // @ts-ignore
disabled={true} disabled={true}
> >
{tag} (Enterpise only Feature) {tag} (Enterprise only Feature)
</SelectItem> </SelectItem>
); );
})} })}