Merge branch 'main' into feat/friendliai

This commit is contained in:
Wonseok Lee (Jack) 2024-06-13 09:59:56 +09:00 committed by GitHub
commit 776c75c1e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
99 changed files with 202794 additions and 632 deletions

View file

@ -202,6 +202,7 @@ jobs:
-e REDIS_PORT=$REDIS_PORT \
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e AWS_REGION_NAME=$AWS_REGION_NAME \

View file

@ -150,7 +150,7 @@ $ litellm --config /path/to/config.yaml
```bash
curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "Authorization: Bearer sk-1234"
```
**Create a Thread**
@ -162,6 +162,14 @@ curl http://0.0.0.0:4000/v1/threads \
-d ''
```
**Get a Thread**
```bash
curl http://0.0.0.0:4000/v1/threads/{thread_id} \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234"
```
**Add Messages to the Thread**
```bash

View file

@ -212,6 +212,94 @@ If you run the code two times, response1 will use the cache from the first run t
</TabItem>
</Tabs>
## Switch Cache On / Off Per LiteLLM Call
LiteLLM supports 4 cache-controls:
- `no-cache`: *Optional(bool)* When `True`, Will not return a cached response, but instead call the actual endpoint.
- `no-store`: *Optional(bool)* When `True`, Will not cache the response.
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
[Let us know if you need more](https://github.com/BerriAI/litellm/issues/1218)
<Tabs>
<TabItem value="no-cache" label="No-Cache">
Example usage `no-cache` - When `True`, Will not return a cached response
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"no-cache": True},
)
```
</TabItem>
<TabItem value="no-store" label="No-Store">
Example usage `no-store` - When `True`, Will not cache the response.
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"no-store": True},
)
```
</TabItem>
<TabItem value="ttl" label="ttl">
Example usage `ttl` - cache the response for 10 seconds
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"ttl": 10},
)
```
</TabItem>
<TabItem value="s-maxage" label="s-maxage">
Example usage `s-maxage` - Will only accept cached responses for 60 seconds
```python
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{
"role": "user",
"content": "hello who are you"
}
],
cache={"s-maxage": 60},
)
```
</TabItem>
</Tabs>
## Cache Context Manager - Enable, Disable, Update Cache

View file

@ -0,0 +1,46 @@
import Image from '@theme/IdealImage';
# Raw Request/Response Logging
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
**on SDK**
```python
# pip install langfuse
import litellm
import os
# log raw request/response
litellm.log_raw_request_response = True
# from https://cloud.langfuse.com/
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
# Optional, defaults to https://cloud.langfuse.com
os.environ["LANGFUSE_HOST"] # optional
# LLM API Keys
os.environ['OPENAI_API_KEY']=""
# set langfuse as a callback, litellm will send the data to langfuse
litellm.success_callback = ["langfuse"]
# openai call
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "Hi 👋 - i'm openai"}
]
)
```
**on Proxy**
```yaml
litellm_settings:
log_raw_request_response: True
```
**Expected Log**
<Image img={require('../../img/raw_request_log.png')}/>

View file

@ -1,3 +1,5 @@
# llmcord.py
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
Github: https://github.com/jakobdylanc/discord-llm-chatbot

View file

@ -11,7 +11,7 @@ LiteLLM supports
:::info
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
:::
@ -229,17 +229,6 @@ assert isinstance(
```
### Setting `anthropic-beta` Header in Requests
Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
```python
response = completion(
model="anthropic/claude-3-opus-20240229",
messages=messages,
tools=tools,
)
```
### Forcing Anthropic Tool Use

View file

@ -3,53 +3,155 @@ import TabItem from '@theme/TabItem';
# Azure AI Studio
**Ensure the following:**
1. The API Base passed ends in the `/v1/` prefix
example:
```python
api_base = "https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/"
```
LiteLLM supports all models on Azure AI Studio
2. The `model` passed is listed in [supported models](#supported-models). You **DO NOT** Need to pass your deployment name to litellm. Example `model=azure/Mistral-large-nmefg`
## Usage
<Tabs>
<TabItem value="sdk" label="SDK">
### ENV VAR
```python
import litellm
response = litellm.completion(
model="azure/command-r-plus",
api_base="<your-deployment-base>/v1/"
api_key="eskk******"
messages=[{"role": "user", "content": "What is the meaning of life?"}],
import os
os.environ["AZURE_API_API_KEY"] = ""
os.environ["AZURE_AI_API_BASE"] = ""
```
### Example Call
```python
from litellm import completion
import os
## set ENV variables
os.environ["AZURE_API_API_KEY"] = "azure ai key"
os.environ["AZURE_AI_API_BASE"] = "azure ai base url" # e.g.: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/
# predibase llama-3 call
response = completion(
model="azure_ai/command-r-plus",
messages = [{ "content": "Hello, how are you?","role": "user"}]
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
## Sample Usage - LiteLLM Proxy
1. Add models to your config.yaml
```yaml
model_list:
- model_name: mistral
litellm_params:
model: azure/mistral-large-latest
api_base: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/
api_key: JGbKodRcTp****
- model_name: command-r-plus
litellm_params:
model: azure/command-r-plus
api_key: os.environ/AZURE_COHERE_API_KEY
api_base: os.environ/AZURE_COHERE_API_BASE
model: azure_ai/command-r-plus
api_key: os.environ/AZURE_AI_API_KEY
api_base: os.environ/AZURE_AI_API_BASE
```
2. Start the proxy
```bash
$ litellm --config /path/to/config.yaml --debug
```
3. Send Request to LiteLLM Proxy Server
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
base_url="http://0.0.0.0:4000" # litellm-proxy-base url
)
response = client.chat.completions.create(
model="command-r-plus",
messages = [
{
"role": "system",
"content": "Be a good human!"
},
{
"role": "user",
"content": "What do you know about earth?"
}
]
)
print(response)
```
</TabItem>
<TabItem value="curl" label="curl">
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "command-r-plus",
"messages": [
{
"role": "system",
"content": "Be a good human!"
},
{
"role": "user",
"content": "What do you know about earth?"
}
],
}'
```
</TabItem>
</Tabs>
</TabItem>
</Tabs>
## Passing additional params - max_tokens, temperature
See all litellm.completion supported params [here](../completion/input.md#translated-openai-params)
```python
# !pip install litellm
from litellm import completion
import os
## set ENV variables
os.environ["AZURE_AI_API_KEY"] = "azure ai api key"
os.environ["AZURE_AI_API_BASE"] = "azure ai api base"
# command r plus call
response = completion(
model="azure_ai/command-r-plus",
messages = [{ "content": "Hello, how are you?","role": "user"}],
max_tokens=20,
temperature=0.5
)
```
**proxy**
```yaml
model_list:
- model_name: command-r-plus
litellm_params:
model: azure_ai/command-r-plus
api_key: os.environ/AZURE_AI_API_KEY
api_base: os.environ/AZURE_AI_API_BASE
max_tokens: 20
temperature: 0.5
```
2. Start the proxy
```bash
@ -103,9 +205,6 @@ response = litellm.completion(
</Tabs>
</TabItem>
</Tabs>
## Function Calling
<Tabs>
@ -115,8 +214,8 @@ response = litellm.completion(
from litellm import completion
# set env
os.environ["AZURE_MISTRAL_API_KEY"] = "your-api-key"
os.environ["AZURE_MISTRAL_API_BASE"] = "your-api-base"
os.environ["AZURE_AI_API_KEY"] = "your-api-key"
os.environ["AZURE_AI_API_BASE"] = "your-api-base"
tools = [
{
@ -141,9 +240,7 @@ tools = [
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
response = completion(
model="azure/mistral-large-latest",
api_base=os.getenv("AZURE_MISTRAL_API_BASE")
api_key=os.getenv("AZURE_MISTRAL_API_KEY")
model="azure_ai/mistral-large-latest",
messages=messages,
tools=tools,
tool_choice="auto",
@ -206,10 +303,12 @@ curl http://0.0.0.0:4000/v1/chat/completions \
## Supported Models
LiteLLM supports **ALL** azure ai models. Here's a few examples:
| Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Cohere command-r-plus | `completion(model="azure/command-r-plus", messages)` |
| Cohere ommand-r | `completion(model="azure/command-r", messages)` |
| Cohere command-r | `completion(model="azure/command-r", messages)` |
| mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` |

View file

@ -144,16 +144,135 @@ print(response)
</TabItem>
</Tabs>
## Set temperature, top p, etc.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
temperature=0.7,
top_p=1
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Set on yaml**
```yaml
model_list:
- model_name: bedrock-claude-v1
litellm_params:
model: bedrock/anthropic.claude-instant-v1
temperature: <your-temp>
top_p: <your-top-p>
```
**Set on request**
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
temperature=0.7,
top_p=1
)
print(response)
```
</TabItem>
</Tabs>
## Pass provider-specific params
If you pass a non-openai param to litellm, we'll assume it's provider-specific and send it as a kwarg in the request body. [See more](../completion/input.md#provider-specific-params)
<Tabs>
<TabItem value="sdk" label="SDK">
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
response = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{ "content": "Hello, how are you?","role": "user"}],
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
**Set on yaml**
```yaml
model_list:
- model_name: bedrock-claude-v1
litellm_params:
model: bedrock/anthropic.claude-instant-v1
top_k: 1 # 👈 PROVIDER-SPECIFIC PARAM
```
**Set on request**
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
temperature=0.7,
extra_body={
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
}
)
print(response)
```
</TabItem>
</Tabs>
## Usage - Function Calling
:::info
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
You can see the raw response via `response._hidden_params["original_response"]`.
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
:::
LiteLLM uses Bedrock's Converse API for making tool calls
```python
from litellm import completion
@ -361,47 +480,6 @@ response = completion(
)
```
### Passing an external BedrockRuntime.Client as a parameter - Completion()
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
Create a client from session credentials:
```python
import boto3
from litellm import completion
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name="us-east-1",
aws_access_key_id="",
aws_secret_access_key="",
aws_session_token="",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
Create a client from AWS profile in `~/.aws/config`:
```python
import boto3
from litellm import completion
dev_session = boto3.Session(profile_name="dev-profile")
bedrock = dev_session.client(
service_name="bedrock-runtime",
region_name="us-east-1",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
### SSO Login (AWS Profile)
- Set `AWS_PROFILE` environment variable
- Make bedrock completion call
@ -464,6 +542,56 @@ response = completion(
)
```
### Passing an external BedrockRuntime.Client as a parameter - Completion()
:::warning
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
:::
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
Create a client from session credentials:
```python
import boto3
from litellm import completion
bedrock = boto3.client(
service_name="bedrock-runtime",
region_name="us-east-1",
aws_access_key_id="",
aws_secret_access_key="",
aws_session_token="",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
Create a client from AWS profile in `~/.aws/config`:
```python
import boto3
from litellm import completion
dev_session = boto3.Session(profile_name="dev-profile")
bedrock = dev_session.client(
service_name="bedrock-runtime",
region_name="us-east-1",
)
response = completion(
model="bedrock/anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_bedrock_client=bedrock,
)
```
## Provisioned throughput models
To use provisioned throughput Bedrock models pass
- `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models)

View file

@ -1,10 +1,13 @@
# 🆕 Clarifai
# Clarifai
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
:::warning
Streaming is not yet supported on using clarifai and litellm. Tracking support here: https://github.com/BerriAI/litellm/issues/4162
:::
## Pre-Requisites
`pip install clarifai`
`pip install litellm`
## Required Environment Variables
@ -12,6 +15,7 @@ To obtain your Clarifai Personal access token follow this [link](https://docs.cl
```python
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
```
## Usage
@ -68,7 +72,7 @@ Example Usage - Note: liteLLM supports all models deployed on Clarifai
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
## Mistal LLMs
## Mistral LLMs
| Model Name | Function Call |
|---------------------------------------------|------------------------------------------------------------------------|
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |

View file

@ -125,11 +125,12 @@ See all litellm.completion supported params [here](../completion/input.md#transl
from litellm import completion
import os
## set ENV variables
os.environ["PREDIBASE_API_KEY"] = "predibase key"
os.environ["DATABRICKS_API_KEY"] = "databricks key"
os.environ["DATABRICKS_API_BASE"] = "databricks api base"
# predibae llama-3 call
# databricks dbrx call
response = completion(
model="predibase/llama3-8b-instruct",
model="databricks/databricks-dbrx-instruct",
messages = [{ "content": "Hello, how are you?","role": "user"}],
max_tokens=20,
temperature=0.5

View file

@ -449,6 +449,54 @@ print(response)
</TabItem>
</Tabs>
## Usage - Function Calling
LiteLLM supports Function Calling for Vertex AI gemini models.
```python
from litellm import completion
import os
# set env
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ".."
os.environ["VERTEX_AI_PROJECT"] = ".."
os.environ["VERTEX_AI_LOCATION"] = ".."
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
response = completion(
model="vertex_ai/gemini-pro-vision",
messages=messages,
tools=tools,
)
# Add any assertions, here to check response args
print(response)
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)
```
## Chat Models
| Model Name | Function Call |
@ -500,6 +548,8 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
| Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| text-embedding-004 | `embedding(model="vertex_ai/text-embedding-004", input)` |
| text-multilingual-embedding-002 | `embedding(model="vertex_ai/text-multilingual-embedding-002", input)` |
| textembedding-gecko | `embedding(model="vertex_ai/textembedding-gecko", input)` |
| textembedding-gecko-multilingual | `embedding(model="vertex_ai/textembedding-gecko-multilingual", input)` |
| textembedding-gecko-multilingual@001 | `embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)` |
@ -508,6 +558,29 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
### Advanced Use `task_type` and `title` (Vertex Specific Params)
👉 `task_type` and `title` are vertex specific params
LiteLLM Supported Vertex Specific Params
```python
auto_truncate: Optional[bool] = None
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
```
**Example Usage with LiteLLM**
```python
response = litellm.embedding(
model="vertex_ai/text-embedding-004",
input=["good morning from litellm", "gm"]
task_type = "RETRIEVAL_DOCUMENT",
dimensions=1,
auto_truncate=True,
)
```
## Image Generation Models
Usage

View file

@ -138,14 +138,22 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
<Image img={require('../../img/admin_ui_spend.png')} />
## API Endpoints to get Spend
#### Getting Spend Reports - To Charge Other Teams, API Keys
#### Getting Spend Reports - To Charge Other Teams, Customers
Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model
Use the `/global/spend/report` endpoint to get daily spend report per
- team
- customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
<Tabs>
<TabItem value="per team" label="Spend Per Team">
##### Example Request
👉 Key Change: Specify `group_by=team`
```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=team' \
-H 'Authorization: Bearer sk-1234'
```
@ -254,6 +262,69 @@ Output from script
```
</TabItem>
</Tabs>
</TabItem>
<TabItem value="per customer" label="Spend Per Customer">
##### Example Request
👉 Key Change: Specify `group_by=customer`
```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=customer' \
-H 'Authorization: Bearer sk-1234'
```
##### Example Response
```shell
[
{
"group_by_day": "2024-04-30T00:00:00+00:00",
"customers": [
{
"customer": "palantir",
"total_spend": 0.0015265,
"metadata": [ # see the spend by unique(key + model)
{
"model": "gpt-4",
"spend": 0.00123,
"total_tokens": 28,
"api_key": "88dc28.." # the hashed api key
},
{
"model": "gpt-4",
"spend": 0.00123,
"total_tokens": 28,
"api_key": "a73dc2.." # the hashed api key
},
{
"model": "chatgpt-v-2",
"spend": 0.000214,
"total_tokens": 122,
"api_key": "898c28.." # the hashed api key
},
{
"model": "gpt-3.5-turbo",
"spend": 0.0000825,
"total_tokens": 85,
"api_key": "84dc28.." # the hashed api key
}
]
}
]
}
]
```
</TabItem>
</Tabs>
@ -356,4 +427,23 @@ model_list:
## Custom Input/Output Pricing
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
## ✨ Custom k,v pairs
Log specific key,value pairs as part of the metadata for a spend log
:::info
Logging specific key,value pairs in spend logs metadata is an enterprise feature. [See here](./enterprise.md#tracking-spend-with-custom-metadata)
:::
## ✨ Custom Tags
:::info
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
:::

View file

@ -42,6 +42,14 @@ Set `JSON_LOGS="True"` in your env:
```bash
export JSON_LOGS="True"
```
**OR**
Set `json_logs: true` in your yaml:
```yaml
litellm_settings:
json_logs: true
```
Start proxy
@ -49,4 +57,35 @@ Start proxy
$ litellm
```
The proxy will now all logs in json format.
The proxy will now all logs in json format.
## Control Log Output
Turn off fastapi's default 'INFO' logs
1. Turn on 'json logs'
```yaml
litellm_settings:
json_logs: true
```
2. Set `LITELLM_LOG` to 'ERROR'
Only get logs if an error occurs.
```bash
LITELLM_LOG="ERROR"
```
3. Start proxy
```bash
$ litellm
```
Expected Output:
```bash
# no info statements
```

View file

@ -1,5 +1,6 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import Image from '@theme/IdealImage';
# 🐳 Docker, Deploying LiteLLM Proxy
@ -537,7 +538,9 @@ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
## Advanced Deployment Settings
### Customization of the server root path
### 1. Customization of the server root path (custom Proxy base url)
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
:::info
@ -548,9 +551,29 @@ In a Kubernetes deployment, it's possible to utilize a shared DNS to host multip
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
```
export SERVER_ROOT_PATH="/api/v1"
```
**Step 1. Run Proxy with `SERVER_ROOT_PATH` set in your env **
### Setting SSL Certification
```shell
docker run --name litellm-proxy \
-e DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname> \
-e SERVER_ROOT_PATH="/api/v1" \
-p 4000:4000 \
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
```
After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (since we set `SERVER_ROOT_PATH="/api/v1"`)
**Step 2. Verify Running on correct path**
<Image img={require('../../img/custom_root_path.png')} />
**That's it**, that's all you need to run the proxy on a custom root path
### 2. Setting SSL Certification
Use this, If you need to set ssl certificates for your on prem litellm proxy

View file

@ -205,6 +205,146 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \
```
## Tracking Spend with custom metadata
Requirements:
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
#### Usage - /chat/completions requests with special spend logs metadata
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"spend_logs_metadata": {
"hello": "world"
}
}
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {
"spend_logs_metadata": {
"hello": "world"
}
}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"spend_logs_metadata": {
"hello": "world"
}
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
#### Viewing Spend w/ custom metadata
#### `/spend/logs` Request Format
```bash
curl -X GET "http://0.0.0.0:4000/spend/logs?request_id=<your-call-id" \ # e.g.: chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm
-H "Authorization: Bearer sk-1234"
```
#### `/spend/logs` Response Format
```bash
[
{
"request_id": "chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm",
"call_type": "acompletion",
"metadata": {
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"user_api_key_alias": null,
"spend_logs_metadata": { # 👈 LOGGED CUSTOM METADATA
"hello": "world"
},
"user_api_key_team_id": null,
"user_api_key_user_id": "116544810872468347480",
"user_api_key_team_alias": null
},
}
]
```
## Enforce Required Params for LLM Requests
Use this when you want to enforce all requests to include certain params. Example you need all requests to include the `user` and `["metadata]["generation_name"]` params.

View file

@ -606,6 +606,52 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
** 🎉 Expect to see this trace logged in your OTEL collector**
### Context propagation across Services `Traceparent HTTP Header`
❓ Use this when you want to **pass information about the incoming request in a distributed tracing system**
✅ Key change: Pass the **`traceparent` header** in your requests. [Read more about traceparent headers here](https://uptrace.dev/opentelemetry/opentelemetry-traceparent.html#what-is-traceparent-header)
```curl
traceparent: 00-80e1afed08e019fc1110464cfa66635c-7a085853722dc6d2-01
```
Example Usage
1. Make Request to LiteLLM Proxy with `traceparent` header
```python
import openai
import uuid
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
extra_headers = {
"traceparent": example_traceparent
}
_trace_id = example_traceparent.split("-")[1]
print("EXTRA HEADERS: ", extra_headers)
print("Trace ID: ", _trace_id)
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
extra_headers=extra_headers,
)
print(response)
```
```shell
# EXTRA HEADERS: {'traceparent': '00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01'}
# Trace ID: 80e1afed08e019fc1110464cfa66635c
```
2. Lookup Trace ID on OTEL Logger
Search for Trace=`80e1afed08e019fc1110464cfa66635c` on your OTEL Collector
<Image img={require('../../img/otel_parent.png')} />

View file

@ -21,6 +21,7 @@ general_settings:
litellm_settings:
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
json_logs: true # Get debug logs in json format
```
Set slack webhook url in your env
@ -28,6 +29,11 @@ Set slack webhook url in your env
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/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"
```
Turn off FASTAPI's default info logs
```bash
export LITELLM_LOG="ERROR"
```
:::info
Need Help or want dedicated support ? Talk to a founder [here]: (https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)

View file

@ -2,18 +2,13 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# 🔥 Fallbacks, Retries, Timeouts, Load Balancing
# 🔥 Load Balancing, Fallbacks, Retries, Timeouts
Retry call with multiple instances of the same model.
If a call fails after num_retries, fall back to another model group.
If the error is a context window exceeded error, fall back to a larger model group (if given).
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
- Quick Start [load balancing](#test---load-balancing)
- Quick Start [client side fallbacks](#test---client-side-fallbacks)
## Quick Start - Load Balancing
### Step 1 - Set deployments on config
#### Step 1 - Set deployments on config
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
```yaml
@ -38,50 +33,214 @@ model_list:
rpm: 1440
```
### Step 2: Start Proxy with config
#### Step 2: Start Proxy with config
```shell
$ litellm --config /path/to/config.yaml
```
### Step 3: Use proxy - Call a model group [Load Balancing]
Curl Command
### Test - Load Balancing
Here requests with model=gpt-3.5-turbo will be routed across multiple instances of azure/gpt-3.5-turbo
👉 Key Change: `model="gpt-3.5-turbo"`
**Check the `model_id` in Response Headers to make sure the requests are being load balanced**
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
]
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data ' {
"model": "gpt-3.5-turbo",
"messages": [
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
"role": "user",
"content": "what llm are you"
}
],
}
'
]
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
import os
os.environ["OPENAI_API_KEY"] = "anything"
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model="gpt-3.5-turbo",
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
### Usage - Call a specific model deployment
If you want to call a specific model defined in the `config.yaml`, you can call the `litellm_params: model`
</TabItem>
</Tabs>
### Test - Client Side Fallbacks
In this request the following will occur:
1. The request to `model="zephyr-beta"` will fail
2. litellm proxy will loop through all the model_groups specified in `fallbacks=["gpt-3.5-turbo"]`
3. The request to `model="gpt-3.5-turbo"` will succeed and the client making the request will get a response from gpt-3.5-turbo
👉 Key Change: `"fallbacks": ["gpt-3.5-turbo"]`
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
response = client.chat.completions.create(
model="zephyr-beta",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"fallbacks": ["gpt-3.5-turbo"]
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "zephyr-beta"",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"fallbacks": ["gpt-3.5-turbo"]
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
import os
os.environ["OPENAI_API_KEY"] = "anything"
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model="zephyr-beta",
extra_body={
"fallbacks": ["gpt-3.5-turbo"]
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
<!--
### Test it!
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
```bash
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data ' {
"model": "azure/gpt-turbo-small-ca",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
}
'
```
--header 'Content-Type: application/json' \
--data-raw '{
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
"messages": [
{"role": "user", "content": "what color is red"}
],
"mock_testing_fallbacks": true
}'
``` -->
## Fallbacks + Retries + Timeouts + Cooldowns
## Advanced
### Fallbacks + Retries + Timeouts + Cooldowns
**Set via config**
```yaml
@ -114,44 +273,7 @@ litellm_settings:
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
```
**Set dynamically**
```bash
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data ' {
"model": "zephyr-beta",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
"context_window_fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
"num_retries": 2,
"timeout": 10
}
'
```
### Test it!
```bash
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data-raw '{
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
"messages": [
{"role": "user", "content": "what color is red"}
],
"mock_testing_fallbacks": true
}'
```
## Advanced - Context Window Fallbacks (Pre-Call Checks + Fallbacks)
### Context Window Fallbacks (Pre-Call Checks + Fallbacks)
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
@ -287,7 +409,7 @@ print(response)
</Tabs>
## Advanced - EU-Region Filtering (Pre-Call Checks)
### EU-Region Filtering (Pre-Call Checks)
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
@ -350,7 +472,7 @@ print(response)
print(f"response.headers.get('x-litellm-model-api-base')")
```
## Advanced - Custom Timeouts, Stream Timeouts - Per Model
### Custom Timeouts, Stream Timeouts - Per Model
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
```yaml
model_list:
@ -379,7 +501,7 @@ $ litellm --config /path/to/config.yaml
```
## Advanced - Setting Dynamic Timeouts - Per Request
### Setting Dynamic Timeouts - Per Request
LiteLLM Proxy supports setting a `timeout` per request

View file

@ -77,6 +77,28 @@ litellm_settings:
#### Step 2: Setup Oauth Client
<Tabs>
<TabItem value="okta" label="Okta SSO">
1. Add Okta credentials to your .env
```bash
GENERIC_CLIENT_ID = "<your-okta-client-id>"
GENERIC_CLIENT_SECRET = "<your-okta-client-secret>"
GENERIC_AUTHORIZATION_ENDPOINT = "<your-okta-domain>/authorize" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/authorize
GENERIC_TOKEN_ENDPOINT = "<your-okta-domain>/token" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/oauth/token
GENERIC_USERINFO_ENDPOINT = "<your-okta-domain>/userinfo" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/userinfo
```
You can get your domain specific auth/token/userinfo endpoints at `<YOUR-OKTA-DOMAIN>/.well-known/openid-configuration`
2. Add proxy url as callback_url on Okta
On Okta, add the 'callback_url' as `<proxy_base_url>/sso/callback`
<Image img={require('../../img/okta_callback_url.png')} />
</TabItem>
<TabItem value="google" label="Google SSO">
- Create a new Oauth 2.0 Client on https://console.cloud.google.com/
@ -115,7 +137,6 @@ MICROSOFT_TENANT="5a39737
</TabItem>
<TabItem value="Generic" label="Generic SSO Provider">
A generic OAuth client that can be used to quickly create support for any OAuth provider with close to no code

View file

@ -63,7 +63,7 @@ You can:
- Add budgets to Teams
#### **Add budgets to users**
#### **Add budgets to teams**
```shell
curl --location 'http://localhost:4000/team/new' \
--header 'Authorization: Bearer <your-master-key>' \
@ -102,6 +102,22 @@ curl --location 'http://localhost:4000/team/new' \
"budget_reset_at": null
}
```
#### **Add budget duration to teams**
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
```
curl 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer <your-master-key>' \
--header 'Content-Type: application/json' \
--data-raw '{
"team_alias": "my-new-team_4",
"members_with_roles": [{"role": "admin", "user_id": "5c4a0aa3-a1e1-43dc-bd87-3c2da8382a3a"}],
"budget_duration": 10s,
}'
```
</TabItem>
<TabItem value="per-team-member" label="For Team Members">

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 279 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 168 KiB

View file

@ -183,6 +183,7 @@ const sidebars = {
label: "Logging & Observability",
items: [
"debugging/local_debugging",
"observability/raw_request_response",
"observability/callbacks",
"observability/custom_callback",
"observability/langfuse_integration",
@ -256,6 +257,7 @@ const sidebars = {
"projects/GPT Migrate",
"projects/YiVal",
"projects/LiteLLM Proxy",
"projects/llm_cord",
],
},
],

View file

@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False
log_raw_request_response: bool = False
redact_messages_in_exceptions: Optional[bool] = False
store_audit_logs = False # Enterprise feature, allow users to see audit logs
## end of callbacks #############
@ -407,6 +408,7 @@ openai_compatible_providers: List = [
"together_ai",
"fireworks_ai",
"friendliai",
"azure_ai",
]
@ -611,6 +613,7 @@ provider_list: List = [
"baseten",
"azure",
"azure_text",
"azure_ai",
"sagemaker",
"bedrock",
"vllm",
@ -765,7 +768,7 @@ from .llms.gemini import GeminiConfig
from .llms.nlp_cloud import NLPCloudConfig
from .llms.aleph_alpha import AlephAlphaConfig
from .llms.petals import PetalsConfig
from .llms.vertex_ai import VertexAIConfig
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
from .llms.sagemaker import SagemakerConfig
from .llms.ollama import OllamaConfig
@ -787,6 +790,7 @@ from .llms.openai import (
OpenAIConfig,
OpenAITextCompletionConfig,
MistralConfig,
MistralEmbeddingConfig,
DeepInfraConfig,
)
from .llms.azure import (

View file

@ -337,8 +337,6 @@ def response_cost_calculator(
and custom_llm_provider is True
): # override defaults if custom pricing is set
base_model = model
elif base_model is None:
base_model = model
# base_model defaults to None if not set on model_info
response_cost = completion_cost(
completion_response=response_object,

View file

@ -337,6 +337,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore
response=response,
litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs
def __str__(self):
@ -379,6 +380,7 @@ class RejectedRequestError(BadRequestError): # type: ignore
model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore
response=response,
litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs
def __str__(self):
@ -418,6 +420,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
model=self.model, # type: ignore
llm_provider=self.llm_provider, # type: ignore
response=response,
litellm_debug_info=self.litellm_debug_info,
) # Call the base class constructor with the parameters it needs
def __str__(self):

View file

@ -6,17 +6,23 @@ import litellm
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_logger
from litellm.types.services import ServiceLoggerPayload
from functools import wraps
from typing import Union, Optional, TYPE_CHECKING, Any
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
from litellm.proxy._types import (
ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
)
Span = _Span
UserAPIKeyAuth = _UserAPIKeyAuth
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
else:
Span = Any
UserAPIKeyAuth = Any
ManagementEndpointLoggingPayload = Any
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
@ -247,7 +253,7 @@ class OpenTelemetry(CustomLogger):
span.end(end_time=self._to_ns(end_time))
def set_tools_attributes(self, span: Span, tools):
from opentelemetry.semconv.ai import SpanAttributes
from litellm.proxy._types import SpanAttributes
import json
if not tools:
@ -272,7 +278,7 @@ class OpenTelemetry(CustomLogger):
pass
def set_attributes(self, span: Span, kwargs, response_obj):
from opentelemetry.semconv.ai import SpanAttributes
from litellm.proxy._types import SpanAttributes
optional_params = kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {}
@ -407,7 +413,7 @@ class OpenTelemetry(CustomLogger):
)
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
from opentelemetry.semconv.ai import SpanAttributes
from litellm.proxy._types import SpanAttributes
optional_params = kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {}
@ -454,6 +460,23 @@ class OpenTelemetry(CustomLogger):
def _get_span_name(self, kwargs):
return LITELLM_REQUEST_SPAN_NAME
def get_traceparent_from_header(self, headers):
if headers is None:
return None
_traceparent = headers.get("traceparent", None)
if _traceparent is None:
return None
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator,
)
verbose_logger.debug("OpenTelemetry: GOT A TRACEPARENT {}".format(_traceparent))
propagator = TraceContextTextMapPropagator()
_parent_context = propagator.extract(carrier={"traceparent": _traceparent})
verbose_logger.debug("OpenTelemetry: PARENT CONTEXT {}".format(_parent_context))
return _parent_context
def _get_span_context(self, kwargs):
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator,
@ -545,3 +568,91 @@ class OpenTelemetry(CustomLogger):
self.OTEL_EXPORTER,
)
return BatchSpanProcessor(ConsoleSpanExporter())
async def async_management_endpoint_success_hook(
self,
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from opentelemetry import trace
from datetime import datetime
from opentelemetry.trace import Status, StatusCode
_start_time_ns = logging_payload.start_time
_end_time_ns = logging_payload.end_time
start_time = logging_payload.start_time
end_time = logging_payload.end_time
if isinstance(start_time, float):
_start_time_ns = int(int(start_time) * 1e9)
else:
_start_time_ns = self._to_ns(start_time)
if isinstance(end_time, float):
_end_time_ns = int(int(end_time) * 1e9)
else:
_end_time_ns = self._to_ns(end_time)
if parent_otel_span is not None:
_span_name = logging_payload.route
management_endpoint_span = self.tracer.start_span(
name=_span_name,
context=trace.set_span_in_context(parent_otel_span),
start_time=_start_time_ns,
)
_request_data = logging_payload.request_data
if _request_data is not None:
for key, value in _request_data.items():
management_endpoint_span.set_attribute(f"request.{key}", value)
_response = logging_payload.response
if _response is not None:
for key, value in _response.items():
management_endpoint_span.set_attribute(f"response.{key}", value)
management_endpoint_span.set_status(Status(StatusCode.OK))
management_endpoint_span.end(end_time=_end_time_ns)
async def async_management_endpoint_failure_hook(
self,
logging_payload: ManagementEndpointLoggingPayload,
parent_otel_span: Optional[Span] = None,
):
from opentelemetry import trace
from datetime import datetime
from opentelemetry.trace import Status, StatusCode
_start_time_ns = logging_payload.start_time
_end_time_ns = logging_payload.end_time
start_time = logging_payload.start_time
end_time = logging_payload.end_time
if isinstance(start_time, float):
_start_time_ns = int(int(start_time) * 1e9)
else:
_start_time_ns = self._to_ns(start_time)
if isinstance(end_time, float):
_end_time_ns = int(int(end_time) * 1e9)
else:
_end_time_ns = self._to_ns(end_time)
if parent_otel_span is not None:
_span_name = logging_payload.route
management_endpoint_span = self.tracer.start_span(
name=_span_name,
context=trace.set_span_in_context(parent_otel_span),
start_time=_start_time_ns,
)
_request_data = logging_payload.request_data
if _request_data is not None:
for key, value in _request_data.items():
management_endpoint_span.set_attribute(f"request.{key}", value)
_exception = logging_payload.exception
management_endpoint_span.set_attribute(f"exception", str(_exception))
management_endpoint_span.set_status(Status(StatusCode.ERROR))
management_endpoint_span.end(end_time=_end_time_ns)

View file

@ -36,6 +36,9 @@ from ..types.llms.openai import (
AsyncAssistantStreamManager,
AssistantStreamManager,
)
from litellm.caching import DualCache
azure_ad_cache = DualCache()
class AzureOpenAIError(Exception):
@ -309,9 +312,10 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
def get_azure_ad_token_from_oidc(azure_ad_token: str):
azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
azure_tenant = os.getenv("AZURE_TENANT_ID", None)
azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
azure_authority_host = os.getenv("AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com")
if azure_client_id is None or azure_tenant is None:
if azure_client_id is None or azure_tenant_id is None:
raise AzureOpenAIError(
status_code=422,
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
@ -325,8 +329,19 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
message="OIDC token could not be retrieved from secret manager.",
)
azure_ad_token_cache_key = json.dumps({
"azure_client_id": azure_client_id,
"azure_tenant_id": azure_tenant_id,
"azure_authority_host": azure_authority_host,
"oidc_token": oidc_token,
})
azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
if azure_ad_token_access_token is not None:
return azure_ad_token_access_token
req_token = httpx.post(
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token",
f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
data={
"client_id": azure_client_id,
"grant_type": "client_credentials",
@ -342,12 +357,23 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
message=req_token.text,
)
possible_azure_ad_token = req_token.json().get("access_token", None)
azure_ad_token_json = req_token.json()
azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
if possible_azure_ad_token is None:
raise AzureOpenAIError(status_code=422, message="Azure AD Token not returned")
if azure_ad_token_access_token is None:
raise AzureOpenAIError(
status_code=422, message="Azure AD Token access_token not returned"
)
return possible_azure_ad_token
if azure_ad_token_expires_in is None:
raise AzureOpenAIError(
status_code=422, message="Azure AD Token expires_in not returned"
)
azure_ad_cache.set_cache(key=azure_ad_token_cache_key, value=azure_ad_token_access_token, ttl=azure_ad_token_expires_in)
return azure_ad_token_access_token
class AzureChatCompletion(BaseLLM):

View file

@ -51,8 +51,11 @@ from litellm.types.llms.openai import (
ChatCompletionResponseMessage,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaChunk,
)
from litellm.caching import DualCache
iam_cache = DualCache()
class AmazonCohereChatConfig:
"""
@ -324,38 +327,53 @@ class BedrockLLM(BaseLLM):
) = params_to_check
### CHECK STS ###
if (
aws_web_identity_token is not None
and aws_role_name is not None
and aws_session_name is not None
):
oidc_token = get_secret(aws_web_identity_token)
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
iam_creds_cache_key = json.dumps({
"aws_web_identity_token": aws_web_identity_token,
"aws_role_name": aws_role_name,
"aws_session_name": aws_session_name,
"aws_region_name": aws_region_name,
})
if oidc_token is None:
raise BedrockError(
message="OIDC token could not be retrieved from secret manager.",
status_code=401,
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
if iam_creds_dict is None:
oidc_token = get_secret(aws_web_identity_token)
if oidc_token is None:
raise BedrockError(
message="OIDC token could not be retrieved from secret manager.",
status_code=401,
)
sts_client = boto3.client(
"sts",
region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
)
sts_client = boto3.client("sts")
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
sts_response = sts_client.assume_role_with_web_identity(
RoleArn=aws_role_name,
RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token,
DurationSeconds=3600,
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
sts_response = sts_client.assume_role_with_web_identity(
RoleArn=aws_role_name,
RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token,
DurationSeconds=3600,
)
iam_creds_dict = {
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
"aws_session_token": sts_response["Credentials"]["SessionToken"],
"region_name": aws_region_name,
}
session = boto3.Session(
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
aws_session_token=sts_response["Credentials"]["SessionToken"],
region_name=aws_region_name,
)
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
return session.get_credentials()
session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials()
return iam_creds
elif aws_role_name is not None and aws_session_name is not None:
sts_client = boto3.client(
"sts",
@ -1415,38 +1433,53 @@ class BedrockConverseLLM(BaseLLM):
) = params_to_check
### CHECK STS ###
if (
aws_web_identity_token is not None
and aws_role_name is not None
and aws_session_name is not None
):
oidc_token = get_secret(aws_web_identity_token)
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
iam_creds_cache_key = json.dumps({
"aws_web_identity_token": aws_web_identity_token,
"aws_role_name": aws_role_name,
"aws_session_name": aws_session_name,
"aws_region_name": aws_region_name,
})
if oidc_token is None:
raise BedrockError(
message="OIDC token could not be retrieved from secret manager.",
status_code=401,
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
if iam_creds_dict is None:
oidc_token = get_secret(aws_web_identity_token)
if oidc_token is None:
raise BedrockError(
message="OIDC token could not be retrieved from secret manager.",
status_code=401,
)
sts_client = boto3.client(
"sts",
region_name=aws_region_name,
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
)
sts_client = boto3.client("sts")
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
sts_response = sts_client.assume_role_with_web_identity(
RoleArn=aws_role_name,
RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token,
DurationSeconds=3600,
)
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
sts_response = sts_client.assume_role_with_web_identity(
RoleArn=aws_role_name,
RoleSessionName=aws_session_name,
WebIdentityToken=oidc_token,
DurationSeconds=3600,
)
iam_creds_dict = {
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
"aws_session_token": sts_response["Credentials"]["SessionToken"],
"region_name": aws_region_name,
}
session = boto3.Session(
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
aws_session_token=sts_response["Credentials"]["SessionToken"],
region_name=aws_region_name,
)
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
return session.get_credentials()
session = boto3.Session(**iam_creds_dict)
iam_creds = session.get_credentials()
return iam_creds
elif aws_role_name is not None and aws_session_name is not None:
sts_client = boto3.client(
"sts",
@ -1859,29 +1892,59 @@ class AWSEventStreamDecoder:
self.parser = EventStreamJSONParser()
def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
text = ""
tool_str = ""
is_finished = False
finish_reason = ""
usage: Optional[ConverseTokenUsageBlock] = None
if "delta" in chunk_data:
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
if "text" in delta_obj:
text = delta_obj["text"]
elif "toolUse" in delta_obj:
tool_str = delta_obj["toolUse"]["input"]
elif "stopReason" in chunk_data:
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
elif "usage" in chunk_data:
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
response = GenericStreamingChunk(
text=text,
tool_str=tool_str,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
)
return response
try:
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
is_finished = False
finish_reason = ""
usage: Optional[ConverseTokenUsageBlock] = None
index = int(chunk_data.get("contentBlockIndex", 0))
if "start" in chunk_data:
start_obj = ContentBlockStartEvent(**chunk_data["start"])
if (
start_obj is not None
and "toolUse" in start_obj
and start_obj["toolUse"] is not None
):
tool_use = {
"id": start_obj["toolUse"]["toolUseId"],
"type": "function",
"function": {
"name": start_obj["toolUse"]["name"],
"arguments": "",
},
}
elif "delta" in chunk_data:
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
if "text" in delta_obj:
text = delta_obj["text"]
elif "toolUse" in delta_obj:
tool_use = {
"id": None,
"type": "function",
"function": {
"name": None,
"arguments": delta_obj["toolUse"]["input"],
},
}
elif "stopReason" in chunk_data:
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
is_finished = True
elif "usage" in chunk_data:
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
response = GenericStreamingChunk(
text=text,
tool_use=tool_use,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
index=index,
)
return response
except Exception as e:
raise Exception("Received streaming error - {}".format(str(e)))
def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
text = ""
@ -1890,12 +1953,16 @@ class AWSEventStreamDecoder:
if "outputText" in chunk_data:
text = chunk_data["outputText"]
# ai21 mapping
if "ai21" in self.model: # fake ai21 streaming
elif "ai21" in self.model: # fake ai21 streaming
text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
is_finished = True
finish_reason = "stop"
######## bedrock.anthropic mappings ###############
elif "delta" in chunk_data:
elif (
"contentBlockIndex" in chunk_data
or "stopReason" in chunk_data
or "metrics" in chunk_data
):
return self.converse_chunk_parser(chunk_data=chunk_data)
######## bedrock.mistral mappings ###############
elif "outputs" in chunk_data:
@ -1905,7 +1972,7 @@ class AWSEventStreamDecoder:
):
text = chunk_data["outputs"][0]["text"]
stop_reason = chunk_data.get("stop_reason", None)
if stop_reason != None:
if stop_reason is not None:
is_finished = True
finish_reason = stop_reason
######## bedrock.cohere mappings ###############
@ -1926,8 +1993,9 @@ class AWSEventStreamDecoder:
text=text,
is_finished=is_finished,
finish_reason=finish_reason,
tool_str="",
usage=None,
index=0,
tool_use=None,
)
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:

View file

@ -139,6 +139,7 @@ def process_response(
def convert_model_to_url(model: str, api_base: str):
user_id, app_id, model_id = model.split(".")
model_id = model_id.lower()
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
@ -171,19 +172,55 @@ async def async_completion(
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
response = await async_handler.post(
api_base, headers=headers, data=json.dumps(data)
url=model, headers=headers, data=json.dumps(data)
)
return process_response(
model=model,
prompt=prompt,
response=response,
model_response=model_response,
logging_obj.post_call(
input=prompt,
api_key=api_key,
data=data,
encoding=encoding,
logging_obj=logging_obj,
original_response=response.text,
additional_args={"complete_input_dict": data},
)
## RESPONSE OBJECT
try:
completion_response = response.json()
except Exception:
raise ClarifaiError(
message=response.text, status_code=response.status_code, url=model
)
# print(completion_response)
try:
choices_list = []
for idx, item in enumerate(completion_response["outputs"]):
if len(item["data"]["text"]["raw"]) > 0:
message_obj = Message(content=item["data"]["text"]["raw"])
else:
message_obj = Message(content=None)
choice_obj = Choices(
finish_reason="stop",
index=idx + 1, # check
message=message_obj,
)
choices_list.append(choice_obj)
model_response["choices"] = choices_list
except Exception as e:
raise ClarifaiError(
message=traceback.format_exc(), status_code=response.status_code, url=model
)
# Calculate Usage
prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len(
encoding.encode(model_response["choices"][0]["message"].get("content"))
)
model_response["model"] = model
model_response["usage"] = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
return model_response
def completion(
@ -241,7 +278,7 @@ def completion(
additional_args={
"complete_input_dict": data,
"headers": headers,
"api_base": api_base,
"api_base": model,
},
)
if acompletion == True:

View file

@ -164,6 +164,49 @@ class MistralConfig:
return optional_params
class MistralEmbeddingConfig:
"""
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
"""
def __init__(
self,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"encoding_format",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "encoding_format":
optional_params["encoding_format"] = value
return optional_params
class DeepInfraConfig:
"""
Reference: https://deepinfra.com/docs/advanced/openai_api

File diff suppressed because it is too large Load diff

View file

@ -4,6 +4,7 @@ from enum import Enum
import requests # type: ignore
import time
from typing import Callable, Optional, Union, List, Literal, Any
from pydantic import BaseModel
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
import litellm, uuid
import httpx, inspect # type: ignore
@ -12,7 +13,12 @@ from litellm.llms.prompt_templates.factory import (
convert_to_gemini_tool_call_result,
convert_to_gemini_tool_call_invoke,
)
from litellm.types.files import get_file_mime_type_for_file_type, get_file_type_from_extension, is_gemini_1_5_accepted_file_type, is_video_file_type
from litellm.types.files import (
get_file_mime_type_for_file_type,
get_file_type_from_extension,
is_gemini_1_5_accepted_file_type,
is_video_file_type,
)
class VertexAIError(Exception):
@ -301,15 +307,15 @@ def _process_gemini_image(image_url: str) -> PartType:
# GCS URIs
if "gs://" in image_url:
# Figure out file type
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
extension = extension_with_dot[1:] # Ex: "png"
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
extension = extension_with_dot[1:] # Ex: "png"
file_type = get_file_type_from_extension(extension)
# Validate the file type is supported by Gemini
if not is_gemini_1_5_accepted_file_type(file_type):
raise Exception(f"File type not supported by gemini - {file_type}")
mime_type = get_file_mime_type_for_file_type(file_type)
file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
@ -320,7 +326,7 @@ def _process_gemini_image(image_url: str) -> PartType:
image = _load_image_from_url(image_url)
_blob = BlobType(data=image.data, mime_type=image._mime_type)
return PartType(inline_data=_blob)
# Base64 encoding
elif "base64" in image_url:
import base64, re
@ -1293,6 +1299,95 @@ async def async_streaming(
return streamwrapper
class VertexAITextEmbeddingConfig(BaseModel):
"""
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
Args:
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
"""
auto_truncate: Optional[bool] = None
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None
title: Optional[str] = None
def __init__(
self,
auto_truncate: Optional[bool] = None,
task_type: Optional[
Literal[
"RETRIEVAL_QUERY",
"RETRIEVAL_DOCUMENT",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING",
"QUESTION_ANSWERING",
"FACT_VERIFICATION",
]
] = None,
title: Optional[str] = None,
) -> None:
locals_ = locals()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"dimensions",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "dimensions":
optional_params["output_dimensionality"] = value
return optional_params
def get_mapped_special_auth_params(self) -> dict:
"""
Common auth params across bedrock/vertex_ai/azure/watsonx
"""
return {"project": "vertex_project", "region_name": "vertex_location"}
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
mapped_params = self.get_mapped_special_auth_params()
for param, value in non_default_params.items():
if param in mapped_params:
optional_params[mapped_params[param]] = value
return optional_params
def embedding(
model: str,
input: Union[list, str],
@ -1316,7 +1411,7 @@ def embedding(
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
)
from vertexai.language_models import TextEmbeddingModel
from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
import google.auth # type: ignore
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
@ -1347,6 +1442,16 @@ def embedding(
if isinstance(input, str):
input = [input]
if optional_params is not None and isinstance(optional_params, dict):
if optional_params.get("task_type") or optional_params.get("title"):
# if user passed task_type or title, cast to TextEmbeddingInput
_task_type = optional_params.pop("task_type", None)
_title = optional_params.pop("title", None)
input = [
TextEmbeddingInput(text=x, task_type=_task_type, title=_title)
for x in input
]
try:
llm_model = TextEmbeddingModel.from_pretrained(model)
except Exception as e:
@ -1363,7 +1468,8 @@ def embedding(
encoding=encoding,
)
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
_input_dict = {"texts": input, **optional_params}
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
## LOGGING PRE-CALL
logging_obj.pre_call(
input=input,
@ -1375,7 +1481,7 @@ def embedding(
)
try:
embeddings = llm_model.get_embeddings(input)
embeddings = llm_model.get_embeddings(**_input_dict)
except Exception as e:
raise VertexAIError(status_code=500, message=str(e))
@ -1383,6 +1489,7 @@ def embedding(
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
## Populate OpenAI compliant dictionary
embedding_response = []
input_tokens: int = 0
for idx, embedding in enumerate(embeddings):
embedding_response.append(
{
@ -1391,14 +1498,10 @@ def embedding(
"embedding": embedding.values,
}
)
input_tokens += embedding.statistics.token_count
model_response["object"] = "list"
model_response["data"] = embedding_response
model_response["model"] = model
input_tokens = 0
input_str = "".join(input)
input_tokens += len(encoding.encode(input_str))
usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
@ -1420,7 +1523,8 @@ async def async_embedding(
"""
Async embedding implementation
"""
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
_input_dict = {"texts": input, **optional_params}
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
## LOGGING PRE-CALL
logging_obj.pre_call(
input=input,
@ -1432,7 +1536,7 @@ async def async_embedding(
)
try:
embeddings = await client.get_embeddings_async(input)
embeddings = await client.get_embeddings_async(**_input_dict)
except Exception as e:
raise VertexAIError(status_code=500, message=str(e))
@ -1440,6 +1544,7 @@ async def async_embedding(
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
## Populate OpenAI compliant dictionary
embedding_response = []
input_tokens: int = 0
for idx, embedding in enumerate(embeddings):
embedding_response.append(
{
@ -1448,18 +1553,13 @@ async def async_embedding(
"embedding": embedding.values,
}
)
input_tokens += embedding.statistics.token_count
model_response["object"] = "list"
model_response["data"] = embedding_response
model_response["model"] = model
input_tokens = 0
input_str = "".join(input)
input_tokens += len(encoding.encode(input_str))
usage = Usage(
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
)
model_response.usage = usage
return model_response

View file

@ -11,10 +11,10 @@ import os, openai, sys, json, inspect, uuid, datetime, threading
from typing import Any, Literal, Union, BinaryIO
from typing_extensions import overload
from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy
import httpx
import litellm
from ._logging import verbose_logger
from litellm import ( # type: ignore
@ -335,6 +335,7 @@ async def acompletion(
or custom_llm_provider == "predibase"
or custom_llm_provider == "bedrock"
or custom_llm_provider == "databricks"
or custom_llm_provider == "clarifai"
or custom_llm_provider in litellm.openai_compatible_providers
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
init_response = await loop.run_in_executor(None, func_with_context)

View file

@ -1387,6 +1387,26 @@
"mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"text-embedding-004": {
"max_tokens": 3072,
"max_input_tokens": 3072,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"text-multilingual-embedding-002": {
"max_tokens": 2048,
"max_input_tokens": 2048,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"textembedding-gecko": {
"max_tokens": 3072,
"max_input_tokens": 3072,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -2,6 +2,6 @@
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -2,6 +2,6 @@
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,7 +1,12 @@
import json
import logging
from logging import Formatter
import sys
import os
from litellm import json_logs
# Set default log level to INFO
log_level = os.getenv("LITELLM_LOG", "INFO")
numeric_level: str = getattr(logging, log_level.upper())
class JsonFormatter(Formatter):
@ -16,6 +21,14 @@ class JsonFormatter(Formatter):
logger = logging.root
handler = logging.StreamHandler()
handler.setFormatter(JsonFormatter())
if json_logs:
handler.setFormatter(JsonFormatter())
else:
formatter = logging.Formatter(
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
datefmt="%H:%M:%S",
)
handler.setFormatter(formatter)
logger.handlers = [handler]
logger.setLevel(logging.INFO)
logger.setLevel(numeric_level)

View file

@ -719,6 +719,8 @@ class Member(LiteLLMBase):
@model_validator(mode="before")
@classmethod
def check_user_info(cls, values):
if not isinstance(values, dict):
raise ValueError("input needs to be a dictionary")
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
return values
@ -757,9 +759,24 @@ class GlobalEndUsersSpend(LiteLLMBase):
class TeamMemberAddRequest(LiteLLMBase):
team_id: str
member: Member
member: Union[List[Member], Member]
max_budget_in_team: Optional[float] = None # Users max budget within the team
def __init__(self, **data):
member_data = data.get("member")
if isinstance(member_data, list):
# If member is a list of dictionaries, convert each dictionary to a Member object
members = [Member(**item) for item in member_data]
# Replace member_data with the list of Member objects
data["member"] = members
elif isinstance(member_data, dict):
# If member is a dictionary, convert it to a single Member object
member = Member(**member_data)
# Replace member_data with the single Member object
data["member"] = member
# Call the superclass __init__ method to initialize the object
super().__init__(**data)
class TeamMemberDeleteRequest(LiteLLMBase):
team_id: str
@ -1472,6 +1489,9 @@ class SpendLogsMetadata(TypedDict):
user_api_key_team_id: Optional[str]
user_api_key_user_id: Optional[str]
user_api_key_team_alias: Optional[str]
spend_logs_metadata: Optional[
dict
] # special param to log k,v pairs to spendlogs for a call
class SpendLogsPayload(TypedDict):
@ -1496,3 +1516,60 @@ class SpendLogsPayload(TypedDict):
request_tags: str # json str
team_id: Optional[str]
end_user: Optional[str]
class SpanAttributes(str, enum.Enum):
# Note: We've taken this from opentelemetry-semantic-conventions-ai
# I chose to not add a new dependency to litellm for this
# Semantic Conventions for LLM requests, this needs to be removed after
# OpenTelemetry Semantic Conventions support Gen AI.
# Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
# Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
LLM_SYSTEM = "gen_ai.system"
LLM_REQUEST_MODEL = "gen_ai.request.model"
LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
LLM_PROMPTS = "gen_ai.prompt"
LLM_COMPLETIONS = "gen_ai.completion"
LLM_RESPONSE_MODEL = "gen_ai.response.model"
LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
LLM_TOKEN_TYPE = "gen_ai.token.type"
# To be added
# LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
# LLM_RESPONSE_ID = "gen_ai.response.id"
# LLM
LLM_REQUEST_TYPE = "llm.request.type"
LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
LLM_USER = "llm.user"
LLM_HEADERS = "llm.headers"
LLM_TOP_K = "llm.top_k"
LLM_IS_STREAMING = "llm.is_streaming"
LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
LLM_PRESENCE_PENALTY = "llm.presence_penalty"
LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
LLM_REQUEST_FUNCTIONS = "llm.request.functions"
LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
# OpenAI
LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
class ManagementEndpointLoggingPayload(LiteLLMBase):
route: str
request_data: dict
response: Optional[dict] = None
exception: Optional[Any] = None
start_time: Optional[datetime] = None
end_time: Optional[datetime] = None

View file

@ -151,8 +151,8 @@ def common_checks(
and route != "/models"
):
if global_proxy_spend > litellm.max_budget:
raise Exception(
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
raise litellm.BudgetExceededError(
current_cost=global_proxy_spend, max_budget=litellm.max_budget
)
return True

View file

@ -0,0 +1,31 @@
from typing import Optional
from fastapi import Request
import ast, json
async def _read_request_body(request: Optional[Request]) -> dict:
"""
Asynchronous function to read the request body and parse it as JSON or literal data.
Parameters:
- request: The request object to read the body from
Returns:
- dict: Parsed request data as a dictionary
"""
try:
request_data: dict = {}
if request is None:
return request_data
body = await request.body()
if body == b"" or body is None:
return request_data
body_str = body.decode()
try:
request_data = ast.literal_eval(body_str)
except:
request_data = json.loads(body_str)
return request_data
except:
return {}

View file

@ -0,0 +1,90 @@
from datetime import datetime
from functools import wraps
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from fastapi import Request
def management_endpoint_wrapper(func):
"""
This wrapper does the following:
1. Log I/O, Exceptions to OTEL
2. Create an Audit log for success calls
"""
@wraps(func)
async def wrapper(*args, **kwargs):
start_time = datetime.now()
try:
result = await func(*args, **kwargs)
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
_response = dict(result) if result is not None else None
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=_response,
start_time=start_time,
end_time=end_time,
)
await open_telemetry_logger.async_management_endpoint_success_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
return result
except Exception as e:
end_time = datetime.now()
if kwargs is None:
kwargs = {}
user_api_key_dict: UserAPIKeyAuth = (
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
)
parent_otel_span = user_api_key_dict.parent_otel_span
if parent_otel_span is not None:
from litellm.proxy.proxy_server import open_telemetry_logger
if open_telemetry_logger is not None:
_http_request: Request = kwargs.get("http_request")
_route = _http_request.url.path
_request_body: dict = await _read_request_body(
request=_http_request
)
logging_payload = ManagementEndpointLoggingPayload(
route=_route,
request_data=_request_body,
response=None,
start_time=start_time,
end_time=end_time,
exception=e,
)
await open_telemetry_logger.async_management_endpoint_failure_hook(
logging_payload=logging_payload,
parent_otel_span=parent_otel_span,
)
raise e
return wrapper

View file

@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
data["cache"][k] = v
verbose_proxy_logger.debug("receiving data: %s", data)
# users can pass in 'user' param to /chat/completions. Don't override it
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
# if users are using user_api_key_auth, set `user` in `data`
data["user"] = user_api_key_dict.user_id
if "metadata" not in data:
data["metadata"] = {}

View file

@ -0,0 +1,63 @@
# What is this?
## Helper utils for the management endpoints (keys/users/teams)
from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
from litellm.proxy.utils import PrismaClient
import uuid
from typing import Optional
async def add_new_member(
new_member: Member,
max_budget_in_team: Optional[float],
prisma_client: PrismaClient,
team_id: str,
user_api_key_dict: UserAPIKeyAuth,
litellm_proxy_admin_name: str,
):
"""
Add a new member to a team
- add team id to user table
- add team member w/ budget to team member table
"""
## ADD TEAM ID, to USER TABLE IF NEW ##
if new_member.user_id is not None:
await prisma_client.db.litellm_usertable.update(
where={"user_id": new_member.user_id},
data={"teams": {"push": [team_id]}},
)
elif new_member.user_email is not None:
user_data = {"user_id": str(uuid.uuid4()), "user_email": new_member.user_email}
## user email is not unique acc. to prisma schema -> future improvement
### for now: check if it exists in db, if not - insert it
existing_user_row = await prisma_client.get_data(
key_val={"user_email": new_member.user_email},
table_name="user",
query_type="find_all",
)
if existing_user_row is None or (
isinstance(existing_user_row, list) and len(existing_user_row) == 0
):
await prisma_client.insert_data(data=user_data, table_name="user")
# Check if trying to set a budget for team member
if max_budget_in_team is not None and new_member.user_id is not None:
# create a new budget item for this member
response = await prisma_client.db.litellm_budgettable.create(
data={
"max_budget": max_budget_in_team,
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
)
_budget_id = response.budget_id
await prisma_client.db.litellm_teammembership.create(
data={
"team_id": team_id,
"user_id": new_member.user_id,
"budget_id": _budget_id,
}
)

View file

@ -14,10 +14,9 @@ model_list:
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: my-triton-model
- model_name: mistral-embed
litellm_params:
model: triton/any"
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
model: mistral/mistral-embed
general_settings:
master_key: sk-1234

View file

@ -90,6 +90,7 @@ from litellm.types.llms.openai import (
HttpxBinaryResponseContent,
)
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
from litellm.proxy.management_helpers.utils import add_new_member
from litellm.proxy.utils import (
PrismaClient,
DBClient,
@ -102,7 +103,6 @@ from litellm.proxy.utils import (
hash_token,
html_form,
missing_keys_html_form,
_read_request_body,
_is_valid_team_configs,
_is_user_proxy_admin,
_get_user_role,
@ -114,6 +114,8 @@ from litellm.proxy.utils import (
_to_ns,
get_error_message_str,
)
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm import (
CreateBatchRequest,
RetrieveBatchRequest,
@ -160,6 +162,10 @@ from litellm.proxy.auth.auth_checks import (
get_user_object,
allowed_routes_check,
get_actual_routes,
log_to_opentelemetry,
)
from litellm.proxy.common_utils.management_endpoint_utils import (
management_endpoint_wrapper,
)
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.exceptions import RejectedRequestError
@ -368,6 +374,11 @@ from typing import Dict
api_key_header = APIKeyHeader(
name="Authorization", auto_error=False, description="Bearer token"
)
azure_api_key_header = APIKeyHeader(
name="API-Key",
auto_error=False,
description="Some older versions of the openai Python package will send an API-Key header with just the API key ",
)
user_api_base = None
user_model = None
user_debug = False
@ -508,18 +519,27 @@ async def check_request_disconnection(request: Request, llm_api_call_task):
async def user_api_key_auth(
request: Request, api_key: str = fastapi.Security(api_key_header)
request: Request,
api_key: str = fastapi.Security(api_key_header),
azure_api_key_header: str = fastapi.Security(azure_api_key_header),
) -> UserAPIKeyAuth:
global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj
try:
if isinstance(api_key, str):
passed_in_key = api_key
api_key = _get_bearer_token(api_key=api_key)
elif isinstance(azure_api_key_header, str):
api_key = azure_api_key_header
parent_otel_span: Optional[Span] = None
if open_telemetry_logger is not None:
parent_otel_span = open_telemetry_logger.tracer.start_span(
name="Received Proxy Server Request",
start_time=_to_ns(datetime.now()),
context=open_telemetry_logger.get_traceparent_from_header(
headers=request.headers
),
)
### USER-DEFINED AUTH FUNCTION ###
if user_custom_auth is not None:
@ -1062,8 +1082,9 @@ async def user_api_key_auth(
_user_id = _user.get("user_id", None)
if user_current_spend > user_max_budget:
raise Exception(
f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
raise litellm.BudgetExceededError(
current_cost=user_current_spend,
max_budget=user_max_budget,
)
else:
# Token exists, not expired now check if its in budget for the user
@ -1094,9 +1115,11 @@ async def user_api_key_auth(
)
if user_current_spend > user_max_budget:
raise Exception(
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
raise litellm.BudgetExceededError(
current_cost=user_current_spend,
max_budget=user_max_budget,
)
# Check 3. Check if user is in their team budget
if valid_token.team_member_spend is not None:
if prisma_client is not None:
@ -1130,8 +1153,9 @@ async def user_api_key_auth(
)
if team_member_budget is not None and team_member_budget > 0:
if valid_token.team_member_spend > team_member_budget:
raise Exception(
f"ExceededBudget: Crossed spend within team. UserID: {valid_token.user_id}, in team {valid_token.team_id} has exceeded their budget. Current spend: {valid_token.team_member_spend}; Max Budget: {team_member_budget}"
raise litellm.BudgetExceededError(
current_cost=valid_token.team_member_spend,
max_budget=team_member_budget,
)
# Check 3. If token is expired
@ -1189,8 +1213,9 @@ async def user_api_key_auth(
####################################
if valid_token.spend >= valid_token.max_budget:
raise Exception(
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
raise litellm.BudgetExceededError(
current_cost=valid_token.spend,
max_budget=valid_token.max_budget,
)
# Check 5. Token Model Spend is under Model budget
@ -1226,8 +1251,9 @@ async def user_api_key_auth(
):
current_model_spend = model_spend[0]["_sum"]["spend"]
current_model_budget = max_budget_per_model[current_model]
raise Exception(
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
raise litellm.BudgetExceededError(
current_cost=current_model_spend,
max_budget=current_model_budget,
)
# Check 6. Team spend is under Team budget
@ -1251,8 +1277,9 @@ async def user_api_key_auth(
)
if valid_token.team_spend >= valid_token.team_max_budget:
raise Exception(
f"ExceededTokenBudget: Current Team Spend: {valid_token.team_spend}; Max Budget for Team: {valid_token.team_max_budget}"
raise litellm.BudgetExceededError(
current_cost=valid_token.team_spend,
max_budget=valid_token.team_max_budget,
)
# Check 8: Additional Common Checks across jwt + key auth
@ -1495,7 +1522,7 @@ async def user_api_key_auth(
)
if valid_token is None:
# No token was found when looking up in the DB
raise Exception("Invalid token passed")
raise Exception("Invalid proxy server token passed")
if valid_token_dict is not None:
if user_id_information is not None and _is_user_proxy_admin(
user_id_information
@ -1528,6 +1555,14 @@ async def user_api_key_auth(
str(e)
)
)
# Log this exception to OTEL
if open_telemetry_logger is not None:
await open_telemetry_logger.async_post_call_failure_hook(
original_exception=e,
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, litellm.BudgetExceededError):
raise ProxyException(
@ -7803,6 +7838,10 @@ async def get_global_spend_report(
default=None,
description="Time till which to view spend",
),
group_by: Optional[Literal["team", "customer"]] = fastapi.Query(
default="team",
description="Group spend by internal team or customer",
),
):
"""
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
@ -7849,69 +7888,130 @@ async def get_global_spend_report(
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
)
# first get data from spend logs -> SpendByModelApiKey
# then read data from "SpendByModelApiKey" to format the response obj
sql_query = """
if group_by == "team":
# first get data from spend logs -> SpendByModelApiKey
# then read data from "SpendByModelApiKey" to format the response obj
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
LEFT JOIN
"LiteLLM_TeamTable" tt
ON
sl.team_id = tt.team_id
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
tt.team_alias,
sl.model,
sl.api_key
)
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
LEFT JOIN
"LiteLLM_TeamTable" tt
ON
sl.team_id = tt.team_id
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
tt.team_alias,
sl.model,
sl.api_key
)
SELECT
group_by_day,
jsonb_agg(jsonb_build_object(
'team_name', team_name,
'total_spend', total_spend,
'metadata', metadata
)) AS teams
FROM (
SELECT
group_by_day,
team_name,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
team_name
) AS aggregated
GROUP BY
group_by_day
ORDER BY
group_by_day;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
return db_response
elif group_by == "customer":
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
date_trunc('day', sl."startTime") AS group_by_day,
sl.end_user AS customer,
sl.model,
sl.api_key,
SUM(sl.spend) AS model_api_spend,
SUM(sl.total_tokens) AS model_api_tokens
FROM
"LiteLLM_SpendLogs" sl
WHERE
sl."startTime" BETWEEN $1::date AND $2::date
GROUP BY
date_trunc('day', sl."startTime"),
customer,
sl.model,
sl.api_key
)
SELECT
group_by_day,
jsonb_agg(jsonb_build_object(
'team_name', team_name,
'customer', customer,
'total_spend', total_spend,
'metadata', metadata
)) AS teams
FROM (
SELECT
group_by_day,
team_name,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
team_name
) AS aggregated
)) AS customers
FROM
(
SELECT
group_by_day,
customer,
SUM(model_api_spend) AS total_spend,
jsonb_agg(jsonb_build_object(
'model', model,
'api_key', api_key,
'spend', model_api_spend,
'total_tokens', model_api_tokens
)) AS metadata
FROM
SpendByModelApiKey
GROUP BY
group_by_day,
customer
) AS aggregated
GROUP BY
group_by_day
ORDER BY
group_by_day;
"""
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj
)
if db_response is None:
return []
return db_response
return db_response
except Exception as e:
raise HTTPException(
@ -8097,7 +8197,9 @@ async def _get_spend_report_for_time_range(
return response, spend_per_tag
except Exception as e:
verbose_proxy_logger.error("Exception in _get_daily_spend_reports", e) # noqa
verbose_proxy_logger.error(
"Exception in _get_daily_spend_reports {}".format(str(e))
) # noqa
@router.post(
@ -8755,7 +8857,7 @@ async def new_user(data: NewUserRequest):
- organization_id: Optional[str] - specify the org a user belongs to.
- user_email: Optional[str] - Specify a user email.
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
- max_budget: Optional[float] - Specify max budget for a given user.
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
@ -8790,7 +8892,10 @@ async def new_user(data: NewUserRequest):
role="user",
user_email=data_json.get("user_email", None),
),
)
),
http_request=Request(
scope={"type": "http"},
),
)
if data.send_invite_email is True:
@ -9823,8 +9928,10 @@ async def delete_end_user(
dependencies=[Depends(user_api_key_auth)],
response_model=LiteLLM_TeamTable,
)
@management_endpoint_wrapper
async def new_team(
data: NewTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
@ -10058,6 +10165,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
@router.post(
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def update_team(
data: UpdateTeamRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
@ -10163,8 +10271,10 @@ async def update_team(
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
)
@management_endpoint_wrapper
async def team_member_add(
data: TeamMemberAddRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
@ -10190,10 +10300,12 @@ async def team_member_add(
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
if data.member is None:
raise HTTPException(status_code=400, detail={"error": "No member passed in"})
raise HTTPException(
status_code=400, detail={"error": "No member/members passed in"}
)
existing_team_row = await prisma_client.get_data( # type: ignore
team_id=data.team_id, table_name="team", query_type="find_unique"
existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
where={"team_id": data.team_id}
)
if existing_team_row is None:
raise HTTPException(
@ -10203,75 +10315,50 @@ async def team_member_add(
},
)
new_member = data.member
complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
existing_team_row.members_with_roles.append(new_member)
if isinstance(data.member, Member):
# add to team db
new_member = data.member
complete_team_data = LiteLLM_TeamTable(
**_get_pydantic_json_dict(existing_team_row),
complete_team_data.members_with_roles.append(new_member)
elif isinstance(data.member, List):
# add to team db
new_members = data.member
complete_team_data.members_with_roles.extend(new_members)
# ADD MEMBER TO TEAM
_db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
updated_team = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id},
data={"members_with_roles": json.dumps(_db_team_members)}, # type: ignore
)
team_row = await prisma_client.update_data(
update_key_values=complete_team_data.json(exclude_none=True),
data=complete_team_data.json(exclude_none=True),
table_name="team",
team_id=data.team_id,
)
## ADD USER, IF NEW ##
user_data = { # type: ignore
"teams": [team_row["team_id"]],
"models": team_row["data"].models,
}
if new_member.user_id is not None:
user_data["user_id"] = new_member.user_id # type: ignore
await prisma_client.update_data(
user_id=new_member.user_id,
data=user_data,
update_key_values_custom_query={
"teams": {
"push": [team_row["team_id"]],
}
},
table_name="user",
if isinstance(data.member, Member):
await add_new_member(
new_member=data.member,
max_budget_in_team=data.max_budget_in_team,
prisma_client=prisma_client,
user_api_key_dict=user_api_key_dict,
litellm_proxy_admin_name=litellm_proxy_admin_name,
team_id=data.team_id,
)
elif new_member.user_email is not None:
user_data["user_id"] = str(uuid.uuid4())
user_data["user_email"] = new_member.user_email
## user email is not unique acc. to prisma schema -> future improvement
### for now: check if it exists in db, if not - insert it
existing_user_row = await prisma_client.get_data(
key_val={"user_email": new_member.user_email},
table_name="user",
query_type="find_all",
)
if existing_user_row is None or (
isinstance(existing_user_row, list) and len(existing_user_row) == 0
):
elif isinstance(data.member, List):
tasks: List = []
for m in data.member:
await add_new_member(
new_member=m,
max_budget_in_team=data.max_budget_in_team,
prisma_client=prisma_client,
user_api_key_dict=user_api_key_dict,
litellm_proxy_admin_name=litellm_proxy_admin_name,
team_id=data.team_id,
)
await asyncio.gather(*tasks)
await prisma_client.insert_data(data=user_data, table_name="user")
# Check if trying to set a budget for team member
if data.max_budget_in_team is not None and new_member.user_id is not None:
# create a new budget item for this member
response = await prisma_client.db.litellm_budgettable.create(
data={
"max_budget": data.max_budget_in_team,
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
)
_budget_id = response.budget_id
await prisma_client.db.litellm_teammembership.create(
data={
"team_id": data.team_id,
"user_id": new_member.user_id,
"budget_id": _budget_id,
}
)
return team_row
return updated_team
@router.post(
@ -10279,8 +10366,10 @@ async def team_member_add(
tags=["team management"],
dependencies=[Depends(user_api_key_auth)],
)
@management_endpoint_wrapper
async def team_member_delete(
data: TeamMemberDeleteRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
@ -10384,8 +10473,10 @@ async def team_member_delete(
@router.post(
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def delete_team(
data: DeleteTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
litellm_changed_by: Optional[str] = Header(
None,
@ -10469,10 +10560,12 @@ async def delete_team(
@router.get(
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def team_info(
http_request: Request,
team_id: str = fastapi.Query(
default=None, description="Team ID in the request parameters"
)
),
):
"""
get info on team + related keys
@ -10556,8 +10649,10 @@ async def team_info(
@router.post(
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def block_team(
data: BlockTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
@ -10578,8 +10673,10 @@ async def block_team(
@router.post(
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def unblock_team(
data: BlockTeamRequest,
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
@ -10600,7 +10697,9 @@ async def unblock_team(
@router.get(
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@management_endpoint_wrapper
async def list_team(
http_request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
@ -13007,7 +13106,9 @@ async def auth_callback(request: Request):
user_role = getattr(result, generic_user_role_attribute_name, None)
if user_id is None:
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
_first_name = getattr(result, "first_name", "") or ""
_last_name = getattr(result, "last_name", "") or ""
user_id = _first_name + _last_name
user_info = None
user_id_models: List = []

View file

@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}")
model_max_budget Json @default("{}")
model_id Int? @unique
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
}

View file

@ -0,0 +1,41 @@
# mypy: ignore-errors
import openai
from opentelemetry import trace
from opentelemetry.context import Context
from opentelemetry.trace import SpanKind
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
trace.set_tracer_provider(TracerProvider())
memory_exporter = InMemorySpanExporter()
span_processor = SimpleSpanProcessor(memory_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
tracer = trace.get_tracer(__name__)
# create an otel traceparent header
tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("ishaan-local-dev-app") as span:
span.set_attribute("generation_name", "ishaan-generation-openai-client")
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
extra_headers = {}
context = trace.set_span_in_context(span)
traceparent = TraceContextTextMapPropagator()
traceparent.inject(carrier=extra_headers, context=context)
print("EXTRA HEADERS: ", extra_headers)
_trace_parent = extra_headers.get("traceparent")
trace_id = _trace_parent.split("-")[1]
print("Trace ID: ", trace_id)
# # request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
extra_headers=extra_headers,
)
print(response)

View file

@ -0,0 +1,21 @@
# mypy: ignore-errors
import openai
import uuid
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
extra_headers = {"traceparent": example_traceparent}
_trace_id = example_traceparent.split("-")[1]
print("EXTRA HEADERS: ", extra_headers)
print("Trace ID: ", _trace_id)
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "user", "content": "this is a test request, write a short poem"}
],
extra_headers=extra_headers,
)
print(response)

View file

@ -48,6 +48,7 @@ from datetime import datetime, timedelta
from litellm.integrations.slack_alerting import SlackAlerting
from typing_extensions import overload
from functools import wraps
from fastapi import Request
if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span
@ -2017,6 +2018,7 @@ def get_logging_payload(
user_api_key_team_id=None,
user_api_key_user_id=None,
user_api_key_team_alias=None,
spend_logs_metadata=None,
)
if isinstance(metadata, dict):
verbose_proxy_logger.debug(
@ -2595,36 +2597,6 @@ async def update_spend(
raise e
async def _read_request_body(request):
"""
Asynchronous function to read the request body and parse it as JSON or literal data.
Parameters:
- request: The request object to read the body from
Returns:
- dict: Parsed request data as a dictionary
"""
import ast, json
try:
request_data = {}
if request is None:
return request_data
body = await request.body()
if body == b"" or body is None:
return request_data
body_str = body.decode()
try:
request_data = ast.literal_eval(body_str)
except:
request_data = json.loads(body_str)
return request_data
except:
return {}
def _is_projected_spend_over_limit(
current_spend: float, soft_budget_limit: Optional[float]
):

View file

@ -2057,11 +2057,14 @@ class Router:
generic_fallback_idx: Optional[int] = None
## check for specific model group-specific fallbacks
for idx, item in enumerate(fallbacks):
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
elif list(item.keys())[0] == "*":
generic_fallback_idx = idx
if isinstance(item, dict):
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
elif list(item.keys())[0] == "*":
generic_fallback_idx = idx
elif isinstance(item, str):
fallback_model_group = [fallbacks.pop(idx)]
## if none, check for generic fallback
if (
fallback_model_group is None
@ -2310,13 +2313,15 @@ class Router:
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
fallback_model_group = None
generic_fallback_idx: Optional[int] = None
## check for specific model group-specific fallbacks
for idx, item in enumerate(fallbacks):
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
elif list(item.keys())[0] == "*":
generic_fallback_idx = idx
if isinstance(item, dict):
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
elif list(item.keys())[0] == "*":
generic_fallback_idx = idx
elif isinstance(item, str):
fallback_model_group = [fallbacks.pop(idx)]
## if none, check for generic fallback
if (
fallback_model_group is None

View file

@ -810,6 +810,28 @@ def test_vertexai_embedding():
pytest.fail(f"Error occurred: {e}")
def test_vertexai_embedding_embedding_latest():
try:
load_vertex_ai_credentials()
litellm.set_verbose = True
response = embedding(
model="vertex_ai/text-embedding-004",
input=["hi"],
dimensions=1,
auto_truncate=True,
task_type="RETRIEVAL_QUERY",
)
assert len(response.data[0]["embedding"]) == 1
assert response.usage.prompt_tokens > 0
print(f"response:", response)
except litellm.RateLimitError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio
async def test_vertexai_aembedding():
try:

View file

@ -220,13 +220,13 @@ def test_completion_bedrock_claude_sts_oidc_auth():
aws_web_identity_token = "oidc/circleci_v2/"
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
try:
litellm.set_verbose = True
response = completion(
response_1 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=10,
@ -236,8 +236,40 @@ def test_completion_bedrock_claude_sts_oidc_auth():
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
# Add any assertions here to check the response
print(response)
print(response_1)
assert len(response_1.choices) > 0
assert len(response_1.choices[0].message.content) > 0
# This second call is to verify that the cache isn't breaking anything
response_2 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=5,
temperature=0.2,
aws_region_name=aws_region_name,
aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
print(response_2)
assert len(response_2.choices) > 0
assert len(response_2.choices[0].message.content) > 0
# This third call is to verify that the cache isn't used for a different region
response_3 = completion(
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
messages=messages,
max_tokens=6,
temperature=0.3,
aws_region_name="us-east-1",
aws_web_identity_token=aws_web_identity_token,
aws_role_name=aws_role_name,
aws_session_name="my-test-session",
)
print(response_3)
assert len(response_3.choices) > 0
assert len(response_3.choices[0].message.content) > 0
except RateLimitError:
pass
except Exception as e:
@ -255,7 +287,7 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
aws_web_identity_token = "oidc/circleci_v2/"
aws_region_name = os.environ["AWS_REGION_NAME"]
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
try:

View file

@ -16,7 +16,7 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
from unittest.mock import patch, MagicMock
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
# litellm.num_retries=3
# litellm.num_retries = 3
litellm.cache = None
litellm.success_callback = []
user_message = "Write a short poem about the sky"
@ -114,6 +114,27 @@ def test_null_role_response():
assert response.choices[0].message.role == "assistant"
def test_completion_azure_ai_command_r():
try:
import os
litellm.set_verbose = True
os.environ["AZURE_AI_API_BASE"] = os.getenv("AZURE_COHERE_API_BASE", "")
os.environ["AZURE_AI_API_KEY"] = os.getenv("AZURE_COHERE_API_KEY", "")
response: litellm.ModelResponse = completion(
model="azure_ai/command-r-plus",
messages=[{"role": "user", "content": "What is the meaning of life?"}],
) # type: ignore
assert "azure_ai" in response.model
except litellm.Timeout as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_azure_command_r():
try:
litellm.set_verbose = True
@ -530,6 +551,7 @@ def test_completion_cohere_command_r_plus_function_call():
messages=messages,
tools=tools,
tool_choice="auto",
force_single_step=True,
)
print(second_response)
except Exception as e:
@ -720,7 +742,11 @@ def test_completion_claude_3_function_plus_image():
print(response)
def test_completion_azure_mistral_large_function_calling():
@pytest.mark.parametrize(
"provider",
["azure", "azure_ai"],
)
def test_completion_azure_mistral_large_function_calling(provider):
"""
This primarily tests if the 'Function()' pydantic object correctly handles argument param passed in as a dict vs. string
"""
@ -751,8 +777,9 @@ def test_completion_azure_mistral_large_function_calling():
"content": "What's the weather like in Boston today in Fahrenheit?",
}
]
response = completion(
model="azure/mistral-large-latest",
model="{}/mistral-large-latest".format(provider),
api_base=os.getenv("AZURE_MISTRAL_API_BASE"),
api_key=os.getenv("AZURE_MISTRAL_API_KEY"),
messages=messages,

View file

@ -34,14 +34,15 @@ class MyCustomHandler(CustomLogger):
self.response_cost = 0
def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call")
print("Pre-API Call")
traceback.print_stack()
self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {})
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call")
print("Post-API Call")
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream")
print("On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success")
@ -372,6 +373,7 @@ async def test_async_custom_handler_embedding_optional_param():
Tests if the openai optional params for embedding - user + encoding_format,
are logged
"""
litellm.set_verbose = True
customHandler_optional_params = MyCustomHandler()
litellm.callbacks = [customHandler_optional_params]
response = await litellm.aembedding(

View file

@ -55,8 +55,12 @@ async def test_content_policy_exception_azure():
except litellm.ContentPolicyViolationError as e:
print("caught a content policy violation error! Passed")
print("exception", e)
assert e.litellm_debug_info is not None
assert isinstance(e.litellm_debug_info, str)
assert len(e.litellm_debug_info) > 0
pass
except Exception as e:
print()
pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -195,6 +195,8 @@ async def test_aimage_generation_vertex_ai():
assert isinstance(d, litellm.ImageObject)
print("data in response.data", d)
assert d.b64_json is not None
except litellm.ServiceUnavailableError as e:
pass
except litellm.RateLimitError as e:
pass
except litellm.ContentPolicyViolationError:

View file

@ -16,6 +16,7 @@ from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLMRoutes
from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.caching import DualCache
from datetime import datetime, timedelta
from fastapi import Request
public_key = {
"kty": "RSA",
@ -346,6 +347,7 @@ async def test_team_token_output(prisma_client, audience):
models=["gpt-3.5-turbo", "gpt-4"],
),
user_api_key_dict=result,
http_request=Request(scope={"type": "http"}),
)
except Exception as e:
pytest.fail(f"This should not fail - {str(e)}")
@ -534,6 +536,7 @@ async def test_user_token_output(
models=["gpt-3.5-turbo", "gpt-4"],
),
user_api_key_dict=result,
http_request=Request(scope={"type": "http"}),
)
if default_team_id:
await new_team(
@ -544,6 +547,7 @@ async def test_user_token_output(
models=["gpt-3.5-turbo", "gpt-4"],
),
user_api_key_dict=result,
http_request=Request(scope={"type": "http"}),
)
except Exception as e:
pytest.fail(f"This should not fail - {str(e)}")

View file

@ -137,6 +137,7 @@ async def test_new_user_response(prisma_client):
NewTeamRequest(
team_id=_team_id,
),
http_request=Request(scope={"type": "http"}),
user_api_key_dict=UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
@ -272,7 +273,7 @@ def test_call_with_invalid_key(prisma_client):
except Exception as e:
print("Got Exception", e)
print(e.message)
assert "Authentication Error, Invalid token passed" in e.message
assert "Authentication Error, Invalid proxy server token passed" in e.message
pass
@ -368,6 +369,7 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
new_team_response = await new_team(
data=team_request,
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
http_request=Request(scope={"type": "http"}),
)
print("new_team_response", new_team_response)
created_team_id = new_team_response["team_id"]
@ -471,7 +473,7 @@ def test_call_with_user_over_budget(prisma_client):
asyncio.run(test())
except Exception as e:
error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail
assert "Budget has been exceeded" in error_detail
print(vars(e))
@ -652,7 +654,7 @@ def test_call_with_proxy_over_budget(prisma_client):
error_detail = e.message
else:
error_detail = traceback.format_exc()
assert "Authentication Error, ExceededBudget:" in error_detail
assert "Budget has been exceeded" in error_detail
print(vars(e))
@ -730,7 +732,7 @@ def test_call_with_user_over_budget_stream(prisma_client):
asyncio.run(test())
except Exception as e:
error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail
assert "Budget has been exceeded" in error_detail
print(vars(e))
@ -827,7 +829,7 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
asyncio.run(test())
except Exception as e:
error_detail = e.message
assert "Authentication Error, ExceededBudget:" in error_detail
assert "Budget has been exceeded" in error_detail
print(vars(e))
@ -1086,6 +1088,7 @@ def test_generate_and_update_key(prisma_client):
api_key="sk-1234",
user_id="1234",
),
http_request=Request(scope={"type": "http"}),
)
_team_2 = "ishaan-special-team_{}".format(uuid.uuid4())
@ -1098,6 +1101,7 @@ def test_generate_and_update_key(prisma_client):
api_key="sk-1234",
user_id="1234",
),
http_request=Request(scope={"type": "http"}),
)
request = NewUserRequest(
@ -1175,7 +1179,6 @@ def test_generate_and_update_key(prisma_client):
asyncio.run(test())
except Exception as e:
print("Got Exception", e)
print(e.message)
pytest.fail(f"An exception occurred - {str(e)}")
@ -1363,7 +1366,7 @@ def test_call_with_key_over_budget(prisma_client):
error_detail = e.message
else:
error_detail = str(e)
assert "Authentication Error, ExceededTokenBudget:" in error_detail
assert "Budget has been exceeded" in error_detail
print(vars(e))
@ -1477,7 +1480,7 @@ def test_call_with_key_over_model_budget(prisma_client):
# print(f"Error - {str(e)}")
traceback.print_exc()
error_detail = e.message
assert "Authentication Error, ExceededModelBudget:" in error_detail
assert "Budget has been exceeded!" in error_detail
print(vars(e))
@ -1638,7 +1641,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
except Exception as e:
print("Got Exception", e)
error_detail = e.message
assert "Authentication Error, ExceededTokenBudget:" in error_detail
assert "Budget has been exceeded" in error_detail
print(vars(e))
@ -2051,6 +2054,7 @@ async def test_master_key_hashing(prisma_client):
api_key="sk-1234",
user_id="1234",
),
http_request=Request(scope={"type": "http"}),
)
_response = await new_user(
@ -2184,6 +2188,7 @@ async def test_create_update_team(prisma_client):
tpm_limit=20,
rpm_limit=20,
),
http_request=Request(scope={"type": "http"}),
user_api_key_dict=UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN,
api_key="sk-1234",
@ -2233,7 +2238,10 @@ async def test_create_update_team(prisma_client):
)
# now hit team_info
response = await team_info(team_id=_team_id)
response = await team_info(
team_id=_team_id,
http_request=Request(scope={"type": "http"}),
)
print("RESPONSE from team_info", response)

View file

@ -1059,3 +1059,53 @@ async def test_default_model_fallbacks(sync_mode, litellm_module_fallbacks):
assert isinstance(response, litellm.ModelResponse)
assert response.model is not None and response.model == "gpt-4o"
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_client_side_fallbacks_list(sync_mode):
"""
Tests Client Side Fallbacks
User can pass "fallbacks": ["gpt-3.5-turbo"] and this should work
"""
router = Router(
model_list=[
{
"model_name": "bad-model",
"litellm_params": {
"model": "openai/my-bad-model",
"api_key": "my-bad-api-key",
},
},
{
"model_name": "my-good-model",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
],
)
if sync_mode:
response = router.completion(
model="bad-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
fallbacks=["my-good-model"],
mock_testing_fallbacks=True,
mock_response="Hey! nice day",
)
else:
response = await router.acompletion(
model="bad-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
fallbacks=["my-good-model"],
mock_testing_fallbacks=True,
mock_response="Hey! nice day",
)
assert isinstance(response, litellm.ModelResponse)
assert response.model is not None and response.model == "gpt-4o"

View file

@ -1463,6 +1463,10 @@ async def test_parallel_streaming_requests(sync_mode, model):
except RateLimitError:
pass
except litellm.InternalServerError as e:
if "predibase" in str(e).lower():
# only skip internal server error from predibase - their endpoint seems quite unstable
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@ -2535,7 +2539,10 @@ def streaming_and_function_calling_format_tests(idx, chunk):
return extracted_chunk, finished
def test_openai_streaming_and_function_calling():
@pytest.mark.parametrize(
"model", ["gpt-3.5-turbo", "anthropic.claude-3-sonnet-20240229-v1:0"]
)
def test_streaming_and_function_calling(model):
tools = [
{
"type": "function",
@ -2556,16 +2563,21 @@ def test_openai_streaming_and_function_calling():
},
}
]
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
try:
response = completion(
model="gpt-3.5-turbo",
litellm.set_verbose = True
response: litellm.CustomStreamWrapper = completion(
model=model,
tools=tools,
messages=messages,
stream=True,
)
tool_choice="required",
) # type: ignore
# Add any assertions here to check the response
for idx, chunk in enumerate(response):
# continue
print("\n{}\n".format(chunk))
if idx == 0:
assert (
chunk.choices[0].delta.tool_calls[0].function.arguments is not None
@ -2573,6 +2585,7 @@ def test_openai_streaming_and_function_calling():
assert isinstance(
chunk.choices[0].delta.tool_calls[0].function.arguments, str
)
# assert False
except Exception as e:
pytest.fail(f"Error occurred: {e}")
raise e

View file

@ -3990,6 +3990,7 @@ def test_async_text_completion():
asyncio.run(test_get_response())
@pytest.mark.skip(reason="Tgai endpoints are unstable")
def test_async_text_completion_together_ai():
litellm.set_verbose = True
print("test_async_text_completion")

View file

@ -187,12 +187,43 @@ def test_load_test_token_counter(model):
print("model={}, total test time={}".format(model, total_time))
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
def test_openai_token_with_image_and_text():
model = "gpt-4o"
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
full_request = {
"model": "gpt-4o",
"tools": [
{
"type": "function",
"function": {
"name": "json",
"parameters": {
"type": "object",
"required": ["clause"],
"properties": {"clause": {"type": "string"}},
},
"description": "Respond with a JSON object.",
},
}
],
"logprobs": False,
"messages": [
{
"role": "user",
"content": [
{
"text": "\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.",
"type": "text",
}
],
}
],
"tool_choice": {"type": "function", "function": {"name": "json"}},
"exclude_models": [],
"disable_fallback": False,
"exclude_providers": [],
}
messages = full_request.get("messages", [])
token_count = token_counter(model=model, messages=messages)
print(token_count)
test_openai_token_with_image_and_text()

View file

@ -23,6 +23,7 @@ from litellm.utils import (
create_pretrained_tokenizer,
create_tokenizer,
get_max_tokens,
get_supported_openai_params,
)
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
@ -386,3 +387,11 @@ def test_get_max_token_unit_test():
) # Returns a number instead of throwing an Exception
assert isinstance(max_tokens, int)
def test_get_supported_openai_params() -> None:
# Mapped provider
assert isinstance(get_supported_openai_params("gpt-4"), list)
# Unmapped provider
assert get_supported_openai_params("nonexistent") is None

View file

@ -1,5 +1,6 @@
from typing import TypedDict, Any, Union, Optional, Literal, List
import json
from .openai import ChatCompletionToolCallChunk
from typing_extensions import (
Self,
Protocol,
@ -118,6 +119,15 @@ class ToolBlockDeltaEvent(TypedDict):
input: str
class ToolUseBlockStartEvent(TypedDict):
name: str
toolUseId: str
class ContentBlockStartEvent(TypedDict, total=False):
toolUse: Optional[ToolUseBlockStartEvent]
class ContentBlockDeltaEvent(TypedDict, total=False):
"""
Either 'text' or 'toolUse' will be specified for Converse API streaming response.
@ -138,10 +148,11 @@ class RequestObject(TypedDict, total=False):
class GenericStreamingChunk(TypedDict):
text: Required[str]
tool_str: Required[str]
tool_use: Optional[ChatCompletionToolCallChunk]
is_finished: Required[bool]
finish_reason: Required[str]
usage: Optional[ConverseTokenUsageBlock]
index: int
class Document(TypedDict):

View file

@ -296,14 +296,27 @@ class ListBatchRequest(TypedDict, total=False):
class ChatCompletionToolCallFunctionChunk(TypedDict):
name: str
name: Optional[str]
arguments: str
class ChatCompletionToolCallChunk(TypedDict):
id: Optional[str]
type: Literal["function"]
function: ChatCompletionToolCallFunctionChunk
class ChatCompletionDeltaToolCallChunk(TypedDict):
id: str
type: Literal["function"]
function: ChatCompletionToolCallFunctionChunk
index: int
class ChatCompletionDeltaChunk(TypedDict, total=False):
content: Optional[str]
tool_calls: List[ChatCompletionDeltaToolCallChunk]
role: str
class ChatCompletionResponseMessage(TypedDict, total=False):

View file

@ -30,7 +30,7 @@ from dataclasses import (
dataclass,
field,
)
import os
import litellm._service_logger # for storing API inputs, outputs, and metadata
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
from litellm.caching import DualCache
@ -49,9 +49,9 @@ except (ImportError, AttributeError):
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
os.environ["TIKTOKEN_CACHE_DIR"] = (
filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
)
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
"CUSTOM_TIKTOKEN_CACHE_DIR", filename
) # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
encoding = tiktoken.get_encoding("cl100k_base")
from importlib import resources
@ -63,6 +63,11 @@ claude_json_str = json.dumps(json_data)
import importlib.metadata
from ._logging import verbose_logger
from .types.router import LiteLLM_Params
from .types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaToolCallChunk,
)
from .integrations.traceloop import TraceloopLogger
from .integrations.athina import AthinaLogger
from .integrations.helicone import HeliconeLogger
@ -933,7 +938,6 @@ class TextCompletionResponse(OpenAIObject):
object=None,
**params,
):
if stream:
object = "text_completion.chunk"
choices = [TextChoices()]
@ -942,7 +946,6 @@ class TextCompletionResponse(OpenAIObject):
if choices is not None and isinstance(choices, list):
new_choices = []
for choice in choices:
if isinstance(choice, TextChoices):
_new_choice = choice
elif isinstance(choice, dict):
@ -1018,7 +1021,6 @@ class ImageObject(OpenAIObject):
revised_prompt: Optional[str] = None
def __init__(self, b64_json=None, url=None, revised_prompt=None):
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
def __contains__(self, key):
@ -1342,28 +1344,29 @@ class Logging:
)
else:
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
# log raw request to provider (like LangFuse)
try:
# [Non-blocking Extra Debug Information in metadata]
_litellm_params = self.model_call_details.get("litellm_params", {})
_metadata = _litellm_params.get("metadata", {}) or {}
if (
litellm.turn_off_message_logging is not None
and litellm.turn_off_message_logging is True
):
# log raw request to provider (like LangFuse) -- if opted in.
if litellm.log_raw_request_response is True:
try:
# [Non-blocking Extra Debug Information in metadata]
_litellm_params = self.model_call_details.get("litellm_params", {})
_metadata = _litellm_params.get("metadata", {}) or {}
if (
litellm.turn_off_message_logging is not None
and litellm.turn_off_message_logging is True
):
_metadata["raw_request"] = (
"redacted by litellm. \
'litellm.turn_off_message_logging=True'"
)
else:
_metadata["raw_request"] = str(curl_command)
except Exception as e:
_metadata["raw_request"] = (
"redacted by litellm. \
'litellm.turn_off_message_logging=True'"
"Unable to Log \
raw request: {}".format(
str(e)
)
)
else:
_metadata["raw_request"] = str(curl_command)
except Exception as e:
_metadata["raw_request"] = (
"Unable to Log \
raw request: {}".format(
str(e)
)
)
if self.logger_fn and callable(self.logger_fn):
try:
self.logger_fn(
@ -1621,7 +1624,6 @@ class Logging:
end_time=end_time,
)
except Exception as e:
complete_streaming_response = None
else:
self.sync_streaming_chunks.append(result)
@ -2391,7 +2393,6 @@ class Logging:
"async_complete_streaming_response"
in self.model_call_details
):
await customLogger.async_log_event(
kwargs=self.model_call_details,
response_obj=self.model_call_details[
@ -2730,7 +2731,7 @@ class Logging:
only redacts when litellm.turn_off_message_logging == True
"""
# check if user opted out of logging message/response to callbacks
if litellm.turn_off_message_logging == True:
if litellm.turn_off_message_logging is True:
# remove messages, prompts, input, response from logging
self.model_call_details["messages"] = [
{"role": "user", "content": "redacted-by-litellm"}
@ -3250,7 +3251,7 @@ def client(original_function):
stream=kwargs.get("stream", False),
)
if kwargs.get("stream", False) == True:
if kwargs.get("stream", False) is True:
cached_result = CustomStreamWrapper(
completion_stream=cached_result,
model=model,
@ -4030,7 +4031,10 @@ def openai_token_counter(
"""
print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}")
try:
encoding = tiktoken.encoding_for_model(model)
if "gpt-4o" in model:
encoding = tiktoken.get_encoding("o200k_base")
else:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
print_verbose("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
@ -4894,6 +4898,18 @@ def get_optional_params_embeddings(
)
final_params = {**optional_params, **kwargs}
return final_params
if custom_llm_provider == "vertex_ai":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="vertex_ai",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.VertexAITextEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if custom_llm_provider == "vertex_ai":
if len(non_default_params.keys()) > 0:
if litellm.drop_params is True: # drop the unsupported non-default values
@ -4927,7 +4943,18 @@ def get_optional_params_embeddings(
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
)
return {**non_default_params, **kwargs}
if custom_llm_provider == "mistral":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="mistral",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if (
custom_llm_provider != "openai"
and custom_llm_provider != "azure"
@ -6166,13 +6193,16 @@ def get_api_base(
if litellm.model_alias_map and model in litellm.model_alias_map:
model = litellm.model_alias_map[model]
try:
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
get_llm_provider(
model=model,
custom_llm_provider=_optional_params.custom_llm_provider,
api_base=_optional_params.api_base,
api_key=_optional_params.api_key,
)
(
model,
custom_llm_provider,
dynamic_api_key,
dynamic_api_base,
) = get_llm_provider(
model=model,
custom_llm_provider=_optional_params.custom_llm_provider,
api_base=_optional_params.api_base,
api_key=_optional_params.api_key,
)
except Exception as e:
verbose_logger.debug("Error occurred in getting api base - {}".format(str(e)))
@ -6220,7 +6250,7 @@ def get_first_chars_messages(kwargs: dict) -> str:
def get_supported_openai_params(
model: str,
custom_llm_provider: str,
custom_llm_provider: Optional[str] = None,
request_type: Literal["chat_completion", "embeddings"] = "chat_completion",
) -> Optional[list]:
"""
@ -6235,6 +6265,11 @@ def get_supported_openai_params(
- List if custom_llm_provider is mapped
- None if unmapped
"""
if not custom_llm_provider:
try:
custom_llm_provider = litellm.get_llm_provider(model=model)[1]
except BadRequestError:
return None
if custom_llm_provider == "bedrock":
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "ollama":
@ -6328,7 +6363,10 @@ def get_supported_openai_params(
"max_retries",
]
elif custom_llm_provider == "mistral":
return litellm.MistralConfig().get_supported_openai_params()
if request_type == "chat_completion":
return litellm.MistralConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "replicate":
return [
"stream",
@ -6370,7 +6408,10 @@ def get_supported_openai_params(
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
elif custom_llm_provider == "vertex_ai":
return litellm.VertexAIConfig().get_supported_openai_params()
if request_type == "chat_completion":
return litellm.VertexAIConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "sagemaker":
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
elif custom_llm_provider == "aleph_alpha":
@ -6577,6 +6618,9 @@ def get_llm_provider(
or get_secret("FIREWORKSAI_API_KEY")
or get_secret("FIREWORKS_AI_TOKEN")
)
elif custom_llm_provider == "azure_ai":
api_base = api_base or get_secret("AZURE_AI_API_BASE") # type: ignore
dynamic_api_key = api_key or get_secret("AZURE_AI_API_KEY")
elif custom_llm_provider == "mistral":
# mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai
api_base = (
@ -7458,7 +7502,6 @@ def validate_environment(model: Optional[str] = None) -> dict:
def set_callbacks(callback_list, function_id=None):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
try:
@ -8767,6 +8810,13 @@ def exception_type(
response=original_exception.response,
litellm_debug_info=extra_information,
)
if "Request failed during generation" in error_str:
# this is an internal server error from predibase
raise litellm.InternalServerError(
message=f"PredibaseException - {error_str}",
llm_provider="predibase",
model=model,
)
elif hasattr(original_exception, "status_code"):
if original_exception.status_code == 500:
exception_mapping_worked = True
@ -9085,7 +9135,7 @@ def exception_type(
):
exception_mapping_worked = True
raise RateLimitError(
message=f"VertexAIException RateLimitError - {error_str}",
message=f"litellm.RateLimitError: VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
@ -9097,7 +9147,14 @@ def exception_type(
),
),
)
elif "500 Internal Server Error" in error_str:
exception_mapping_worked = True
raise ServiceUnavailableError(
message=f"litellm.ServiceUnavailableError: VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
)
if hasattr(original_exception, "status_code"):
if original_exception.status_code == 400:
exception_mapping_worked = True
@ -10048,6 +10105,14 @@ def get_secret(
return oidc_token
else:
raise ValueError("Github OIDC provider failed")
elif oidc_provider == "azure":
# https://azure.github.io/azure-workload-identity/docs/quick-start.html
azure_federated_token_file = os.getenv("AZURE_FEDERATED_TOKEN_FILE")
if azure_federated_token_file is None:
raise ValueError("AZURE_FEDERATED_TOKEN_FILE not found in environment")
with open(azure_federated_token_file, "r") as f:
oidc_token = f.read()
return oidc_token
else:
raise ValueError("Unsupported OIDC provider")
@ -11311,7 +11376,6 @@ class CustomStreamWrapper:
raise StopIteration
response_obj: GenericStreamingChunk = chunk
completion_obj["content"] = response_obj["text"]
if response_obj["is_finished"]:
self.received_finish_reason = response_obj["finish_reason"]
@ -11326,6 +11390,10 @@ class CustomStreamWrapper:
completion_tokens=response_obj["usage"]["outputTokens"],
total_tokens=response_obj["usage"]["totalTokens"],
)
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
completion_obj["tool_calls"] = [response_obj["tool_use"]]
elif self.custom_llm_provider == "sagemaker":
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
response_obj = self.handle_sagemaker_stream(chunk)
@ -11342,7 +11410,6 @@ class CustomStreamWrapper:
new_chunk = self.completion_stream[:chunk_size]
completion_obj["content"] = new_chunk
self.completion_stream = self.completion_stream[chunk_size:]
time.sleep(0.05)
elif self.custom_llm_provider == "palm":
# fake streaming
response_obj = {}
@ -11355,7 +11422,6 @@ class CustomStreamWrapper:
new_chunk = self.completion_stream[:chunk_size]
completion_obj["content"] = new_chunk
self.completion_stream = self.completion_stream[chunk_size:]
time.sleep(0.05)
elif self.custom_llm_provider == "ollama":
response_obj = self.handle_ollama_stream(chunk)
completion_obj["content"] = response_obj["text"]
@ -11442,7 +11508,7 @@ class CustomStreamWrapper:
# for azure, we need to pass the model from the orignal chunk
self.model = chunk.model
response_obj = self.handle_openai_chat_completion_chunk(chunk)
if response_obj == None:
if response_obj is None:
return
completion_obj["content"] = response_obj["text"]
print_verbose(f"completion obj content: {completion_obj['content']}")
@ -11575,7 +11641,7 @@ class CustomStreamWrapper:
else:
if (
self.stream_options is not None
and self.stream_options["include_usage"] == True
and self.stream_options["include_usage"] is True
):
return model_response
return
@ -11600,8 +11666,14 @@ class CustomStreamWrapper:
return model_response
elif (
"content" in completion_obj
and isinstance(completion_obj["content"], str)
and len(completion_obj["content"]) > 0
and (
isinstance(completion_obj["content"], str)
and len(completion_obj["content"]) > 0
)
or (
"tool_calls" in completion_obj
and len(completion_obj["tool_calls"]) > 0
)
): # cannot set content of an OpenAI Object to be an empty string
hold, model_response_str = self.check_special_tokens(
chunk=completion_obj["content"],
@ -11657,7 +11729,7 @@ class CustomStreamWrapper:
else:
## else
completion_obj["content"] = model_response_str
if self.sent_first_chunk == False:
if self.sent_first_chunk is False:
completion_obj["role"] = "assistant"
self.sent_first_chunk = True
model_response.choices[0].delta = Delta(**completion_obj)
@ -11666,7 +11738,7 @@ class CustomStreamWrapper:
else:
return
elif self.received_finish_reason is not None:
if self.sent_last_chunk == True:
if self.sent_last_chunk is True:
raise StopIteration
# flush any remaining holding chunk
if len(self.holding_chunk) > 0:

View file

@ -1387,6 +1387,26 @@
"mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"text-embedding-004": {
"max_tokens": 3072,
"max_input_tokens": 3072,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"text-multilingual-embedding-002": {
"max_tokens": 2048,
"max_input_tokens": 2048,
"output_vector_size": 768,
"input_cost_per_token": 0.00000000625,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-embedding-models",
"mode": "embedding",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
},
"textembedding-gecko": {
"max_tokens": 3072,
"max_input_tokens": 3072,

50
poetry.lock generated
View file

@ -1545,6 +1545,53 @@ files = [
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
]
[[package]]
name = "mypy"
version = "1.10.0"
description = "Optional static typing for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"},
{file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"},
{file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"},
{file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"},
{file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"},
{file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"},
{file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"},
{file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"},
{file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"},
{file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"},
{file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"},
{file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"},
{file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"},
{file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"},
{file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"},
{file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"},
{file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"},
{file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"},
{file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"},
{file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"},
{file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"},
{file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"},
{file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"},
{file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"},
{file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"},
{file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"},
{file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"},
]
[package.dependencies]
mypy-extensions = ">=1.0.0"
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
typing-extensions = ">=4.1.0"
[package.extras]
dmypy = ["psutil (>=4.0)"]
install-types = ["pip"]
mypyc = ["setuptools (>=50)"]
reports = ["lxml"]
[[package]]
name = "mypy-extensions"
version = "1.0.0"
@ -2127,6 +2174,7 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@ -3150,4 +3198,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"
content-hash = "73054c657782120d170dc168ef07b494a916f1f810ff9c2b0ac878bd857a9dac"

View file

@ -85,6 +85,9 @@ model_list:
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: mistral-embed
litellm_params:
model: mistral/mistral-embed
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
litellm_params:
model: text-completion-openai/gpt-3.5-turbo-instruct

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.40.8"
version = "1.40.9"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -19,7 +19,7 @@ documentation = "https://docs.litellm.ai"
python = ">=3.8.1,<4.0, !=3.9.7"
openai = ">=1.27.0"
python-dotenv = ">=0.2.0"
tiktoken = ">=0.4.0"
tiktoken = ">=0.7.0"
importlib-metadata = ">=6.8.0"
tokenizers = "*"
click = "*"
@ -76,6 +76,7 @@ litellm = 'litellm:run_server'
[tool.poetry.group.dev.dependencies]
flake8 = "^6.1.0"
black = "^23.12.0"
mypy = "^1.0"
pytest = "^7.4.3"
pytest-mock = "^3.12.0"
@ -84,7 +85,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.40.8"
version = "1.40.9"
version_files = [
"pyproject.toml:^version"
]

View file

@ -34,7 +34,7 @@ opentelemetry-exporter-otlp==1.25.0
### LITELLM PACKAGE DEPENDENCIES
python-dotenv==1.0.0 # for env
tiktoken==0.6.0 # for calculating usage
tiktoken==0.7.0 # for calculating usage
importlib-metadata==6.8.0 # for random utils
tokenizers==0.14.0 # for calculating usage
click==8.1.7 # for proxy cli

View file

@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}")
model_max_budget Json @default("{}")
model_id Int? @unique
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
}

View file

@ -664,7 +664,7 @@ async def test_key_crossing_budget():
response = await chat_completion(session=session, key=key)
pytest.fail("Should have failed - Key crossed it's budget")
except Exception as e:
assert "ExceededTokenBudget: Current spend for token:" in str(e)
assert "Budget has been exceeded!" in str(e)
@pytest.mark.skip(reason="AWS Suspended Account")

View file

@ -22,6 +22,7 @@ async def generate_key(
"text-embedding-ada-002",
"dall-e-2",
"fake-openai-endpoint-2",
"mistral-embed",
],
):
url = "http://0.0.0.0:4000/key/generate"
@ -197,14 +198,14 @@ async def completion(session, key):
return response
async def embeddings(session, key):
async def embeddings(session, key, model="text-embedding-ada-002"):
url = "http://0.0.0.0:4000/embeddings"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": "text-embedding-ada-002",
"model": model,
"input": ["hello world"],
}
@ -408,6 +409,9 @@ async def test_embeddings():
key_2 = key_gen["key"]
await embeddings(session=session, key=key_2)
# embedding request with non OpenAI model
await embeddings(session=session, key=key, model="mistral-embed")
@pytest.mark.asyncio
async def test_image_generation():

View file

@ -49,7 +49,7 @@ async def new_user(
async def add_member(
session, i, team_id, user_id=None, user_email=None, max_budget=None
session, i, team_id, user_id=None, user_email=None, max_budget=None, members=None
):
url = "http://0.0.0.0:4000/team/member_add"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
@ -58,10 +58,13 @@ async def add_member(
data["member"]["user_email"] = user_email
elif user_id is not None:
data["member"]["user_id"] = user_id
elif members is not None:
data["member"] = members
if max_budget is not None:
data["max_budget_in_team"] = max_budget
print("sent data: {}".format(data))
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
@ -339,7 +342,7 @@ async def test_team_info():
async def test_team_update_sc_2():
"""
- Create team
- Add 1 user (doesn't exist in db)
- Add 3 users (doesn't exist in db)
- Change team alias
- Check if it works
- Assert team object unchanged besides team alias
@ -353,15 +356,20 @@ async def test_team_update_sc_2():
{"role": "admin", "user_id": admin_user},
]
team_data = await new_team(session=session, i=0, member_list=member_list)
## Create new normal user
new_normal_user = f"krrish_{uuid.uuid4()}@berri.ai"
## Create 10 normal users
members = [
{"role": "user", "user_id": f"krrish_{uuid.uuid4()}@berri.ai"}
for _ in range(10)
]
await add_member(
session=session,
i=0,
team_id=team_data["team_id"],
user_id=None,
user_email=new_normal_user,
session=session, i=0, team_id=team_data["team_id"], members=members
)
## ASSERT TEAM SIZE
team_info = await get_team_info(
session=session, get_team=team_data["team_id"], call_key="sk-1234"
)
assert len(team_info["team_info"]["members_with_roles"]) == 12
## CHANGE TEAM ALIAS
@ -570,4 +578,4 @@ async def test_users_in_team_budget():
except Exception as e:
print("got exception, this is expected")
print(e)
assert "Crossed spend within team" in str(e)
assert "Budget has been exceeded" in str(e)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""]
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,6 @@
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,6 @@
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -119,9 +119,24 @@ const ChatUI: React.FC<ChatUIProps> = ({
// Now, 'options' contains the list you wanted
console.log(options); // You can log it to verify the list
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
setModelInfo(options);
// if options.length > 0, only store unique values
if (options.length > 0) {
const uniqueModels = Array.from(new Set(options));
console.log("Unique models:", uniqueModels);
// sort uniqueModels alphabetically
uniqueModels.sort((a: any, b: any) => a.label.localeCompare(b.label));
console.log("Model info:", modelInfo);
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
setModelInfo(uniqueModels);
}
setSelectedModel(fetchedAvailableModels.data[0].id);
}
} catch (error) {

View file

@ -1130,7 +1130,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
setSelectedAPIKey(key);
}}
>
{key["key_alias"]} (Enterpise only Feature)
{key["key_alias"]} (Enterprise only Feature)
</SelectItem>
);
}
@ -1165,7 +1165,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
setSelectedCustomer(user);
}}
>
{user} (Enterpise only Feature)
{user} (Enterprise only Feature)
</SelectItem>
);
})

View file

@ -114,7 +114,7 @@ const Navbar: React.FC<NavbarProps> = ({
textDecoration: "underline",
}}
>
Get enterpise license
Get enterprise license
</a>
</div>
) : null}

View file

@ -832,7 +832,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
// @ts-ignore
disabled={true}
>
{tag} (Enterpise only Feature)
{tag} (Enterprise only Feature)
</SelectItem>
);
})}