forked from phoenix/litellm-mirror
Merge branch 'main' into feat/friendliai
This commit is contained in:
commit
776c75c1e5
99 changed files with 202794 additions and 632 deletions
|
@ -202,6 +202,7 @@ jobs:
|
||||||
-e REDIS_PORT=$REDIS_PORT \
|
-e REDIS_PORT=$REDIS_PORT \
|
||||||
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
|
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
|
||||||
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
|
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
|
||||||
|
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
|
||||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||||
|
|
|
@ -150,7 +150,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
```bash
|
```bash
|
||||||
curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \
|
curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer sk-1234" \
|
-H "Authorization: Bearer sk-1234"
|
||||||
```
|
```
|
||||||
|
|
||||||
**Create a Thread**
|
**Create a Thread**
|
||||||
|
@ -162,6 +162,14 @@ curl http://0.0.0.0:4000/v1/threads \
|
||||||
-d ''
|
-d ''
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Get a Thread**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/threads/{thread_id} \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
**Add Messages to the Thread**
|
**Add Messages to the Thread**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -212,6 +212,94 @@ If you run the code two times, response1 will use the cache from the first run t
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Switch Cache On / Off Per LiteLLM Call
|
||||||
|
|
||||||
|
LiteLLM supports 4 cache-controls:
|
||||||
|
|
||||||
|
- `no-cache`: *Optional(bool)* When `True`, Will not return a cached response, but instead call the actual endpoint.
|
||||||
|
- `no-store`: *Optional(bool)* When `True`, Will not cache the response.
|
||||||
|
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
|
||||||
|
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
|
||||||
|
|
||||||
|
[Let us know if you need more](https://github.com/BerriAI/litellm/issues/1218)
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="no-cache" label="No-Cache">
|
||||||
|
|
||||||
|
Example usage `no-cache` - When `True`, Will not return a cached response
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello who are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
cache={"no-cache": True},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="no-store" label="No-Store">
|
||||||
|
|
||||||
|
Example usage `no-store` - When `True`, Will not cache the response.
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello who are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
cache={"no-store": True},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="ttl" label="ttl">
|
||||||
|
Example usage `ttl` - cache the response for 10 seconds
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello who are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
cache={"ttl": 10},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="s-maxage" label="s-maxage">
|
||||||
|
Example usage `s-maxage` - Will only accept cached responses for 60 seconds
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hello who are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
cache={"s-maxage": 60},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Cache Context Manager - Enable, Disable, Update Cache
|
## Cache Context Manager - Enable, Disable, Update Cache
|
||||||
|
|
46
docs/my-website/docs/observability/raw_request_response.md
Normal file
46
docs/my-website/docs/observability/raw_request_response.md
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# Raw Request/Response Logging
|
||||||
|
|
||||||
|
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
|
||||||
|
|
||||||
|
**on SDK**
|
||||||
|
```python
|
||||||
|
# pip install langfuse
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# log raw request/response
|
||||||
|
litellm.log_raw_request_response = True
|
||||||
|
|
||||||
|
# from https://cloud.langfuse.com/
|
||||||
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
|
# Optional, defaults to https://cloud.langfuse.com
|
||||||
|
os.environ["LANGFUSE_HOST"] # optional
|
||||||
|
|
||||||
|
# LLM API Keys
|
||||||
|
os.environ['OPENAI_API_KEY']=""
|
||||||
|
|
||||||
|
# set langfuse as a callback, litellm will send the data to langfuse
|
||||||
|
litellm.success_callback = ["langfuse"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**on Proxy**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
log_raw_request_response: True
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Log**
|
||||||
|
|
||||||
|
<Image img={require('../../img/raw_request_log.png')}/>
|
|
@ -1,3 +1,5 @@
|
||||||
|
# llmcord.py
|
||||||
|
|
||||||
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
|
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
|
||||||
|
|
||||||
Github: https://github.com/jakobdylanc/discord-llm-chatbot
|
Github: https://github.com/jakobdylanc/discord-llm-chatbot
|
|
@ -11,7 +11,7 @@ LiteLLM supports
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed
|
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
@ -229,17 +229,6 @@ assert isinstance(
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Setting `anthropic-beta` Header in Requests
|
|
||||||
|
|
||||||
Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
|
|
||||||
|
|
||||||
```python
|
|
||||||
response = completion(
|
|
||||||
model="anthropic/claude-3-opus-20240229",
|
|
||||||
messages=messages,
|
|
||||||
tools=tools,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Forcing Anthropic Tool Use
|
### Forcing Anthropic Tool Use
|
||||||
|
|
||||||
|
|
|
@ -3,53 +3,155 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Azure AI Studio
|
# Azure AI Studio
|
||||||
|
|
||||||
**Ensure the following:**
|
LiteLLM supports all models on Azure AI Studio
|
||||||
1. The API Base passed ends in the `/v1/` prefix
|
|
||||||
example:
|
|
||||||
```python
|
|
||||||
api_base = "https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/"
|
|
||||||
```
|
|
||||||
|
|
||||||
2. The `model` passed is listed in [supported models](#supported-models). You **DO NOT** Need to pass your deployment name to litellm. Example `model=azure/Mistral-large-nmefg`
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
### ENV VAR
|
||||||
```python
|
```python
|
||||||
import litellm
|
import os
|
||||||
response = litellm.completion(
|
os.environ["AZURE_API_API_KEY"] = ""
|
||||||
model="azure/command-r-plus",
|
os.environ["AZURE_AI_API_BASE"] = ""
|
||||||
api_base="<your-deployment-base>/v1/"
|
```
|
||||||
api_key="eskk******"
|
|
||||||
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
### Example Call
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["AZURE_API_API_KEY"] = "azure ai key"
|
||||||
|
os.environ["AZURE_AI_API_BASE"] = "azure ai base url" # e.g.: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/
|
||||||
|
|
||||||
|
# predibase llama-3 call
|
||||||
|
response = completion(
|
||||||
|
model="azure_ai/command-r-plus",
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="proxy" label="PROXY">
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
## Sample Usage - LiteLLM Proxy
|
|
||||||
|
|
||||||
1. Add models to your config.yaml
|
1. Add models to your config.yaml
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: mistral
|
|
||||||
litellm_params:
|
|
||||||
model: azure/mistral-large-latest
|
|
||||||
api_base: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/
|
|
||||||
api_key: JGbKodRcTp****
|
|
||||||
- model_name: command-r-plus
|
- model_name: command-r-plus
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/command-r-plus
|
model: azure_ai/command-r-plus
|
||||||
api_key: os.environ/AZURE_COHERE_API_KEY
|
api_key: os.environ/AZURE_AI_API_KEY
|
||||||
api_base: os.environ/AZURE_COHERE_API_BASE
|
api_base: os.environ/AZURE_AI_API_BASE
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml --debug
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Send Request to LiteLLM Proxy Server
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
|
||||||
|
base_url="http://0.0.0.0:4000" # litellm-proxy-base url
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="command-r-plus",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "Be a good human!"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What do you know about earth?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "command-r-plus",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "Be a good human!"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What do you know about earth?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Passing additional params - max_tokens, temperature
|
||||||
|
See all litellm.completion supported params [here](../completion/input.md#translated-openai-params)
|
||||||
|
|
||||||
|
```python
|
||||||
|
# !pip install litellm
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["AZURE_AI_API_KEY"] = "azure ai api key"
|
||||||
|
os.environ["AZURE_AI_API_BASE"] = "azure ai api base"
|
||||||
|
|
||||||
|
# command r plus call
|
||||||
|
response = completion(
|
||||||
|
model="azure_ai/command-r-plus",
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
max_tokens=20,
|
||||||
|
temperature=0.5
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**proxy**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: command-r-plus
|
||||||
|
litellm_params:
|
||||||
|
model: azure_ai/command-r-plus
|
||||||
|
api_key: os.environ/AZURE_AI_API_KEY
|
||||||
|
api_base: os.environ/AZURE_AI_API_BASE
|
||||||
|
max_tokens: 20
|
||||||
|
temperature: 0.5
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
2. Start the proxy
|
2. Start the proxy
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -103,9 +205,6 @@ response = litellm.completion(
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Function Calling
|
## Function Calling
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
|
@ -115,8 +214,8 @@ response = litellm.completion(
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
# set env
|
# set env
|
||||||
os.environ["AZURE_MISTRAL_API_KEY"] = "your-api-key"
|
os.environ["AZURE_AI_API_KEY"] = "your-api-key"
|
||||||
os.environ["AZURE_MISTRAL_API_BASE"] = "your-api-base"
|
os.environ["AZURE_AI_API_BASE"] = "your-api-base"
|
||||||
|
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
|
@ -141,9 +240,7 @@ tools = [
|
||||||
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="azure/mistral-large-latest",
|
model="azure_ai/mistral-large-latest",
|
||||||
api_base=os.getenv("AZURE_MISTRAL_API_BASE")
|
|
||||||
api_key=os.getenv("AZURE_MISTRAL_API_KEY")
|
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
|
@ -206,10 +303,12 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
|
||||||
## Supported Models
|
## Supported Models
|
||||||
|
|
||||||
|
LiteLLM supports **ALL** azure ai models. Here's a few examples:
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| Cohere command-r-plus | `completion(model="azure/command-r-plus", messages)` |
|
| Cohere command-r-plus | `completion(model="azure/command-r-plus", messages)` |
|
||||||
| Cohere ommand-r | `completion(model="azure/command-r", messages)` |
|
| Cohere command-r | `completion(model="azure/command-r", messages)` |
|
||||||
| mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` |
|
| mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -144,16 +144,135 @@ print(response)
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Set temperature, top p, etc.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
temperature=0.7,
|
||||||
|
top_p=1
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
**Set on yaml**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-claude-v1
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-instant-v1
|
||||||
|
temperature: <your-temp>
|
||||||
|
top_p: <your-top-p>
|
||||||
|
```
|
||||||
|
|
||||||
|
**Set on request**
|
||||||
|
|
||||||
|
```python
|
||||||
|
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
temperature=0.7,
|
||||||
|
top_p=1
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Pass provider-specific params
|
||||||
|
|
||||||
|
If you pass a non-openai param to litellm, we'll assume it's provider-specific and send it as a kwarg in the request body. [See more](../completion/input.md#provider-specific-params)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
**Set on yaml**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-claude-v1
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-instant-v1
|
||||||
|
top_k: 1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
```
|
||||||
|
|
||||||
|
**Set on request**
|
||||||
|
|
||||||
|
```python
|
||||||
|
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
temperature=0.7,
|
||||||
|
extra_body={
|
||||||
|
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Usage - Function Calling
|
## Usage - Function Calling
|
||||||
|
|
||||||
:::info
|
LiteLLM uses Bedrock's Converse API for making tool calls
|
||||||
|
|
||||||
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
|
|
||||||
|
|
||||||
You can see the raw response via `response._hidden_params["original_response"]`.
|
|
||||||
|
|
||||||
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
|
|
||||||
:::
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
@ -361,47 +480,6 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
|
||||||
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
|
|
||||||
|
|
||||||
Create a client from session credentials:
|
|
||||||
```python
|
|
||||||
import boto3
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
bedrock = boto3.client(
|
|
||||||
service_name="bedrock-runtime",
|
|
||||||
region_name="us-east-1",
|
|
||||||
aws_access_key_id="",
|
|
||||||
aws_secret_access_key="",
|
|
||||||
aws_session_token="",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_bedrock_client=bedrock,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Create a client from AWS profile in `~/.aws/config`:
|
|
||||||
```python
|
|
||||||
import boto3
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
dev_session = boto3.Session(profile_name="dev-profile")
|
|
||||||
bedrock = dev_session.client(
|
|
||||||
service_name="bedrock-runtime",
|
|
||||||
region_name="us-east-1",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_bedrock_client=bedrock,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### SSO Login (AWS Profile)
|
### SSO Login (AWS Profile)
|
||||||
- Set `AWS_PROFILE` environment variable
|
- Set `AWS_PROFILE` environment variable
|
||||||
- Make bedrock completion call
|
- Make bedrock completion call
|
||||||
|
@ -464,6 +542,56 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||||
|
|
||||||
|
:::warning
|
||||||
|
|
||||||
|
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
|
||||||
|
|
||||||
|
Create a client from session credentials:
|
||||||
|
```python
|
||||||
|
import boto3
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
bedrock = boto3.client(
|
||||||
|
service_name="bedrock-runtime",
|
||||||
|
region_name="us-east-1",
|
||||||
|
aws_access_key_id="",
|
||||||
|
aws_secret_access_key="",
|
||||||
|
aws_session_token="",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_bedrock_client=bedrock,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a client from AWS profile in `~/.aws/config`:
|
||||||
|
```python
|
||||||
|
import boto3
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
dev_session = boto3.Session(profile_name="dev-profile")
|
||||||
|
bedrock = dev_session.client(
|
||||||
|
service_name="bedrock-runtime",
|
||||||
|
region_name="us-east-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_bedrock_client=bedrock,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Provisioned throughput models
|
## Provisioned throughput models
|
||||||
To use provisioned throughput Bedrock models pass
|
To use provisioned throughput Bedrock models pass
|
||||||
- `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models)
|
- `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models)
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
# 🆕 Clarifai
|
# Clarifai
|
||||||
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
|
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
|
||||||
|
|
||||||
|
:::warning
|
||||||
|
|
||||||
|
Streaming is not yet supported on using clarifai and litellm. Tracking support here: https://github.com/BerriAI/litellm/issues/4162
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
## Pre-Requisites
|
## Pre-Requisites
|
||||||
|
|
||||||
`pip install clarifai`
|
|
||||||
|
|
||||||
`pip install litellm`
|
`pip install litellm`
|
||||||
|
|
||||||
## Required Environment Variables
|
## Required Environment Variables
|
||||||
|
@ -12,6 +15,7 @@ To obtain your Clarifai Personal access token follow this [link](https://docs.cl
|
||||||
|
|
||||||
```python
|
```python
|
||||||
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
@ -68,7 +72,7 @@ Example Usage - Note: liteLLM supports all models deployed on Clarifai
|
||||||
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
|
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
|
||||||
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
|
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
|
||||||
|
|
||||||
## Mistal LLMs
|
## Mistral LLMs
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|---------------------------------------------|------------------------------------------------------------------------|
|
|---------------------------------------------|------------------------------------------------------------------------|
|
||||||
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |
|
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |
|
||||||
|
|
|
@ -125,11 +125,12 @@ See all litellm.completion supported params [here](../completion/input.md#transl
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
import os
|
import os
|
||||||
## set ENV variables
|
## set ENV variables
|
||||||
os.environ["PREDIBASE_API_KEY"] = "predibase key"
|
os.environ["DATABRICKS_API_KEY"] = "databricks key"
|
||||||
|
os.environ["DATABRICKS_API_BASE"] = "databricks api base"
|
||||||
|
|
||||||
# predibae llama-3 call
|
# databricks dbrx call
|
||||||
response = completion(
|
response = completion(
|
||||||
model="predibase/llama3-8b-instruct",
|
model="databricks/databricks-dbrx-instruct",
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
max_tokens=20,
|
max_tokens=20,
|
||||||
temperature=0.5
|
temperature=0.5
|
||||||
|
|
|
@ -449,6 +449,54 @@ print(response)
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Usage - Function Calling
|
||||||
|
|
||||||
|
LiteLLM supports Function Calling for Vertex AI gemini models.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
# set env
|
||||||
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ".."
|
||||||
|
os.environ["VERTEX_AI_PROJECT"] = ".."
|
||||||
|
os.environ["VERTEX_AI_LOCATION"] = ".."
|
||||||
|
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="vertex_ai/gemini-pro-vision",
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
)
|
||||||
|
# Add any assertions, here to check response args
|
||||||
|
print(response)
|
||||||
|
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
|
||||||
|
assert isinstance(
|
||||||
|
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Chat Models
|
## Chat Models
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
@ -500,6 +548,8 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| text-embedding-004 | `embedding(model="vertex_ai/text-embedding-004", input)` |
|
||||||
|
| text-multilingual-embedding-002 | `embedding(model="vertex_ai/text-multilingual-embedding-002", input)` |
|
||||||
| textembedding-gecko | `embedding(model="vertex_ai/textembedding-gecko", input)` |
|
| textembedding-gecko | `embedding(model="vertex_ai/textembedding-gecko", input)` |
|
||||||
| textembedding-gecko-multilingual | `embedding(model="vertex_ai/textembedding-gecko-multilingual", input)` |
|
| textembedding-gecko-multilingual | `embedding(model="vertex_ai/textembedding-gecko-multilingual", input)` |
|
||||||
| textembedding-gecko-multilingual@001 | `embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)` |
|
| textembedding-gecko-multilingual@001 | `embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)` |
|
||||||
|
@ -508,6 +558,29 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
|
||||||
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
|
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
|
||||||
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
|
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
|
||||||
|
|
||||||
|
### Advanced Use `task_type` and `title` (Vertex Specific Params)
|
||||||
|
|
||||||
|
👉 `task_type` and `title` are vertex specific params
|
||||||
|
|
||||||
|
LiteLLM Supported Vertex Specific Params
|
||||||
|
|
||||||
|
```python
|
||||||
|
auto_truncate: Optional[bool] = None
|
||||||
|
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
|
||||||
|
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example Usage with LiteLLM**
|
||||||
|
```python
|
||||||
|
response = litellm.embedding(
|
||||||
|
model="vertex_ai/text-embedding-004",
|
||||||
|
input=["good morning from litellm", "gm"]
|
||||||
|
task_type = "RETRIEVAL_DOCUMENT",
|
||||||
|
dimensions=1,
|
||||||
|
auto_truncate=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
## Image Generation Models
|
## Image Generation Models
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
|
|
|
@ -138,14 +138,22 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
|
||||||
<Image img={require('../../img/admin_ui_spend.png')} />
|
<Image img={require('../../img/admin_ui_spend.png')} />
|
||||||
|
|
||||||
## API Endpoints to get Spend
|
## API Endpoints to get Spend
|
||||||
#### Getting Spend Reports - To Charge Other Teams, API Keys
|
#### Getting Spend Reports - To Charge Other Teams, Customers
|
||||||
|
|
||||||
Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model
|
Use the `/global/spend/report` endpoint to get daily spend report per
|
||||||
|
- team
|
||||||
|
- customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="per team" label="Spend Per Team">
|
||||||
|
|
||||||
##### Example Request
|
##### Example Request
|
||||||
|
|
||||||
|
👉 Key Change: Specify `group_by=team`
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
|
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=team' \
|
||||||
-H 'Authorization: Bearer sk-1234'
|
-H 'Authorization: Bearer sk-1234'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -254,6 +262,69 @@ Output from script
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
|
<TabItem value="per customer" label="Spend Per Customer">
|
||||||
|
|
||||||
|
##### Example Request
|
||||||
|
|
||||||
|
👉 Key Change: Specify `group_by=customer`
|
||||||
|
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=customer' \
|
||||||
|
-H 'Authorization: Bearer sk-1234'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Example Response
|
||||||
|
|
||||||
|
|
||||||
|
```shell
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"group_by_day": "2024-04-30T00:00:00+00:00",
|
||||||
|
"customers": [
|
||||||
|
{
|
||||||
|
"customer": "palantir",
|
||||||
|
"total_spend": 0.0015265,
|
||||||
|
"metadata": [ # see the spend by unique(key + model)
|
||||||
|
{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"spend": 0.00123,
|
||||||
|
"total_tokens": 28,
|
||||||
|
"api_key": "88dc28.." # the hashed api key
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"spend": 0.00123,
|
||||||
|
"total_tokens": 28,
|
||||||
|
"api_key": "a73dc2.." # the hashed api key
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "chatgpt-v-2",
|
||||||
|
"spend": 0.000214,
|
||||||
|
"total_tokens": 122,
|
||||||
|
"api_key": "898c28.." # the hashed api key
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"spend": 0.0000825,
|
||||||
|
"total_tokens": 85,
|
||||||
|
"api_key": "84dc28.." # the hashed api key
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
@ -356,4 +427,23 @@ model_list:
|
||||||
|
|
||||||
## Custom Input/Output Pricing
|
## Custom Input/Output Pricing
|
||||||
|
|
||||||
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
|
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
|
||||||
|
|
||||||
|
## ✨ Custom k,v pairs
|
||||||
|
|
||||||
|
Log specific key,value pairs as part of the metadata for a spend log
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Logging specific key,value pairs in spend logs metadata is an enterprise feature. [See here](./enterprise.md#tracking-spend-with-custom-metadata)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
## ✨ Custom Tags
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
|
||||||
|
|
||||||
|
:::
|
|
@ -42,6 +42,14 @@ Set `JSON_LOGS="True"` in your env:
|
||||||
```bash
|
```bash
|
||||||
export JSON_LOGS="True"
|
export JSON_LOGS="True"
|
||||||
```
|
```
|
||||||
|
**OR**
|
||||||
|
|
||||||
|
Set `json_logs: true` in your yaml:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
json_logs: true
|
||||||
|
```
|
||||||
|
|
||||||
Start proxy
|
Start proxy
|
||||||
|
|
||||||
|
@ -49,4 +57,35 @@ Start proxy
|
||||||
$ litellm
|
$ litellm
|
||||||
```
|
```
|
||||||
|
|
||||||
The proxy will now all logs in json format.
|
The proxy will now all logs in json format.
|
||||||
|
|
||||||
|
## Control Log Output
|
||||||
|
|
||||||
|
Turn off fastapi's default 'INFO' logs
|
||||||
|
|
||||||
|
1. Turn on 'json logs'
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
json_logs: true
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Set `LITELLM_LOG` to 'ERROR'
|
||||||
|
|
||||||
|
Only get logs if an error occurs.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
LITELLM_LOG="ERROR"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Start proxy
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Output:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# no info statements
|
||||||
|
```
|
|
@ -1,5 +1,6 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# 🐳 Docker, Deploying LiteLLM Proxy
|
# 🐳 Docker, Deploying LiteLLM Proxy
|
||||||
|
|
||||||
|
@ -537,7 +538,9 @@ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
||||||
|
|
||||||
## Advanced Deployment Settings
|
## Advanced Deployment Settings
|
||||||
|
|
||||||
### Customization of the server root path
|
### 1. Customization of the server root path (custom Proxy base url)
|
||||||
|
|
||||||
|
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
@ -548,9 +551,29 @@ In a Kubernetes deployment, it's possible to utilize a shared DNS to host multip
|
||||||
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
|
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
|
||||||
|
|
||||||
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
|
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
|
||||||
|
```
|
||||||
|
export SERVER_ROOT_PATH="/api/v1"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 1. Run Proxy with `SERVER_ROOT_PATH` set in your env **
|
||||||
|
|
||||||
### Setting SSL Certification
|
```shell
|
||||||
|
docker run --name litellm-proxy \
|
||||||
|
-e DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname> \
|
||||||
|
-e SERVER_ROOT_PATH="/api/v1" \
|
||||||
|
-p 4000:4000 \
|
||||||
|
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (since we set `SERVER_ROOT_PATH="/api/v1"`)
|
||||||
|
|
||||||
|
**Step 2. Verify Running on correct path**
|
||||||
|
|
||||||
|
<Image img={require('../../img/custom_root_path.png')} />
|
||||||
|
|
||||||
|
**That's it**, that's all you need to run the proxy on a custom root path
|
||||||
|
|
||||||
|
### 2. Setting SSL Certification
|
||||||
|
|
||||||
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
||||||
|
|
||||||
|
|
|
@ -205,6 +205,146 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Tracking Spend with custom metadata
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
|
||||||
|
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
||||||
|
|
||||||
|
#### Usage - /chat/completions requests with special spend logs metadata
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||||
|
|
||||||
|
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"metadata": {
|
||||||
|
"spend_logs_metadata": {
|
||||||
|
"hello": "world"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"spend_logs_metadata": {
|
||||||
|
"hello": "world"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="langchain" label="Langchain">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
|
model = "gpt-3.5-turbo",
|
||||||
|
temperature=0.1,
|
||||||
|
extra_body={
|
||||||
|
"metadata": {
|
||||||
|
"spend_logs_metadata": {
|
||||||
|
"hello": "world"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
#### Viewing Spend w/ custom metadata
|
||||||
|
|
||||||
|
#### `/spend/logs` Request Format
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X GET "http://0.0.0.0:4000/spend/logs?request_id=<your-call-id" \ # e.g.: chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm
|
||||||
|
-H "Authorization: Bearer sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `/spend/logs` Response Format
|
||||||
|
```bash
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"request_id": "chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm",
|
||||||
|
"call_type": "acompletion",
|
||||||
|
"metadata": {
|
||||||
|
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||||
|
"user_api_key_alias": null,
|
||||||
|
"spend_logs_metadata": { # 👈 LOGGED CUSTOM METADATA
|
||||||
|
"hello": "world"
|
||||||
|
},
|
||||||
|
"user_api_key_team_id": null,
|
||||||
|
"user_api_key_user_id": "116544810872468347480",
|
||||||
|
"user_api_key_team_alias": null
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Enforce Required Params for LLM Requests
|
## Enforce Required Params for LLM Requests
|
||||||
Use this when you want to enforce all requests to include certain params. Example you need all requests to include the `user` and `["metadata]["generation_name"]` params.
|
Use this when you want to enforce all requests to include certain params. Example you need all requests to include the `user` and `["metadata]["generation_name"]` params.
|
||||||
|
|
||||||
|
|
|
@ -606,6 +606,52 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
|
||||||
** 🎉 Expect to see this trace logged in your OTEL collector**
|
** 🎉 Expect to see this trace logged in your OTEL collector**
|
||||||
|
|
||||||
|
### Context propagation across Services `Traceparent HTTP Header`
|
||||||
|
|
||||||
|
❓ Use this when you want to **pass information about the incoming request in a distributed tracing system**
|
||||||
|
|
||||||
|
✅ Key change: Pass the **`traceparent` header** in your requests. [Read more about traceparent headers here](https://uptrace.dev/opentelemetry/opentelemetry-traceparent.html#what-is-traceparent-header)
|
||||||
|
```curl
|
||||||
|
traceparent: 00-80e1afed08e019fc1110464cfa66635c-7a085853722dc6d2-01
|
||||||
|
```
|
||||||
|
Example Usage
|
||||||
|
1. Make Request to LiteLLM Proxy with `traceparent` header
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
|
||||||
|
extra_headers = {
|
||||||
|
"traceparent": example_traceparent
|
||||||
|
}
|
||||||
|
_trace_id = example_traceparent.split("-")[1]
|
||||||
|
|
||||||
|
print("EXTRA HEADERS: ", extra_headers)
|
||||||
|
print("Trace ID: ", _trace_id)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="llama3",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||||
|
],
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# EXTRA HEADERS: {'traceparent': '00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01'}
|
||||||
|
# Trace ID: 80e1afed08e019fc1110464cfa66635c
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Lookup Trace ID on OTEL Logger
|
||||||
|
|
||||||
|
Search for Trace=`80e1afed08e019fc1110464cfa66635c` on your OTEL Collector
|
||||||
|
|
||||||
|
<Image img={require('../../img/otel_parent.png')} />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ general_settings:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
|
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
|
||||||
|
json_logs: true # Get debug logs in json format
|
||||||
```
|
```
|
||||||
|
|
||||||
Set slack webhook url in your env
|
Set slack webhook url in your env
|
||||||
|
@ -28,6 +29,11 @@ Set slack webhook url in your env
|
||||||
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/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"
|
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/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"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Turn off FASTAPI's default info logs
|
||||||
|
```bash
|
||||||
|
export LITELLM_LOG="ERROR"
|
||||||
|
```
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
Need Help or want dedicated support ? Talk to a founder [here]: (https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
Need Help or want dedicated support ? Talk to a founder [here]: (https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||||
|
|
|
@ -2,18 +2,13 @@ import Image from '@theme/IdealImage';
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# 🔥 Fallbacks, Retries, Timeouts, Load Balancing
|
# 🔥 Load Balancing, Fallbacks, Retries, Timeouts
|
||||||
|
|
||||||
Retry call with multiple instances of the same model.
|
- Quick Start [load balancing](#test---load-balancing)
|
||||||
|
- Quick Start [client side fallbacks](#test---client-side-fallbacks)
|
||||||
If a call fails after num_retries, fall back to another model group.
|
|
||||||
|
|
||||||
If the error is a context window exceeded error, fall back to a larger model group (if given).
|
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
|
||||||
|
|
||||||
## Quick Start - Load Balancing
|
## Quick Start - Load Balancing
|
||||||
### Step 1 - Set deployments on config
|
#### Step 1 - Set deployments on config
|
||||||
|
|
||||||
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
|
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -38,50 +33,214 @@ model_list:
|
||||||
rpm: 1440
|
rpm: 1440
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Start Proxy with config
|
#### Step 2: Start Proxy with config
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --config /path/to/config.yaml
|
$ litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 3: Use proxy - Call a model group [Load Balancing]
|
### Test - Load Balancing
|
||||||
Curl Command
|
|
||||||
|
Here requests with model=gpt-3.5-turbo will be routed across multiple instances of azure/gpt-3.5-turbo
|
||||||
|
|
||||||
|
👉 Key Change: `model="gpt-3.5-turbo"`
|
||||||
|
|
||||||
|
**Check the `model_id` in Response Headers to make sure the requests are being load balanced**
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data '{
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "what llm are you"
|
"content": "what llm are you"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
}
|
}'
|
||||||
'
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="langchain" label="Langchain">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["OPENAI_API_KEY"] = "anything"
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage - Call a specific model deployment
|
</TabItem>
|
||||||
If you want to call a specific model defined in the `config.yaml`, you can call the `litellm_params: model`
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
### Test - Client Side Fallbacks
|
||||||
|
In this request the following will occur:
|
||||||
|
1. The request to `model="zephyr-beta"` will fail
|
||||||
|
2. litellm proxy will loop through all the model_groups specified in `fallbacks=["gpt-3.5-turbo"]`
|
||||||
|
3. The request to `model="gpt-3.5-turbo"` will succeed and the client making the request will get a response from gpt-3.5-turbo
|
||||||
|
|
||||||
|
👉 Key Change: `"fallbacks": ["gpt-3.5-turbo"]`
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="zephyr-beta",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"fallbacks": ["gpt-3.5-turbo"]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "zephyr-beta"",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fallbacks": ["gpt-3.5-turbo"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="langchain" label="Langchain">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["OPENAI_API_KEY"] = "anything"
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
|
model="zephyr-beta",
|
||||||
|
extra_body={
|
||||||
|
"fallbacks": ["gpt-3.5-turbo"]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
### Test it!
|
||||||
|
|
||||||
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data-raw '{
|
||||||
"model": "azure/gpt-turbo-small-ca",
|
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{"role": "user", "content": "what color is red"}
|
||||||
"role": "user",
|
],
|
||||||
"content": "what llm are you"
|
"mock_testing_fallbacks": true
|
||||||
}
|
}'
|
||||||
],
|
``` -->
|
||||||
}
|
|
||||||
'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Fallbacks + Retries + Timeouts + Cooldowns
|
## Advanced
|
||||||
|
### Fallbacks + Retries + Timeouts + Cooldowns
|
||||||
|
|
||||||
**Set via config**
|
**Set via config**
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -114,44 +273,7 @@ litellm_settings:
|
||||||
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
||||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||||
```
|
```
|
||||||
|
### Context Window Fallbacks (Pre-Call Checks + Fallbacks)
|
||||||
**Set dynamically**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data ' {
|
|
||||||
"model": "zephyr-beta",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
|
|
||||||
"context_window_fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
|
|
||||||
"num_retries": 2,
|
|
||||||
"timeout": 10
|
|
||||||
}
|
|
||||||
'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Test it!
|
|
||||||
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{
|
|
||||||
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "what color is red"}
|
|
||||||
],
|
|
||||||
"mock_testing_fallbacks": true
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Advanced - Context Window Fallbacks (Pre-Call Checks + Fallbacks)
|
|
||||||
|
|
||||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||||
|
|
||||||
|
@ -287,7 +409,7 @@ print(response)
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Advanced - EU-Region Filtering (Pre-Call Checks)
|
### EU-Region Filtering (Pre-Call Checks)
|
||||||
|
|
||||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||||
|
|
||||||
|
@ -350,7 +472,7 @@ print(response)
|
||||||
print(f"response.headers.get('x-litellm-model-api-base')")
|
print(f"response.headers.get('x-litellm-model-api-base')")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced - Custom Timeouts, Stream Timeouts - Per Model
|
### Custom Timeouts, Stream Timeouts - Per Model
|
||||||
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
|
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
|
@ -379,7 +501,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Advanced - Setting Dynamic Timeouts - Per Request
|
### Setting Dynamic Timeouts - Per Request
|
||||||
|
|
||||||
LiteLLM Proxy supports setting a `timeout` per request
|
LiteLLM Proxy supports setting a `timeout` per request
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,28 @@ litellm_settings:
|
||||||
|
|
||||||
#### Step 2: Setup Oauth Client
|
#### Step 2: Setup Oauth Client
|
||||||
<Tabs>
|
<Tabs>
|
||||||
|
<TabItem value="okta" label="Okta SSO">
|
||||||
|
|
||||||
|
1. Add Okta credentials to your .env
|
||||||
|
|
||||||
|
```bash
|
||||||
|
GENERIC_CLIENT_ID = "<your-okta-client-id>"
|
||||||
|
GENERIC_CLIENT_SECRET = "<your-okta-client-secret>"
|
||||||
|
GENERIC_AUTHORIZATION_ENDPOINT = "<your-okta-domain>/authorize" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/authorize
|
||||||
|
GENERIC_TOKEN_ENDPOINT = "<your-okta-domain>/token" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/oauth/token
|
||||||
|
GENERIC_USERINFO_ENDPOINT = "<your-okta-domain>/userinfo" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/userinfo
|
||||||
|
```
|
||||||
|
|
||||||
|
You can get your domain specific auth/token/userinfo endpoints at `<YOUR-OKTA-DOMAIN>/.well-known/openid-configuration`
|
||||||
|
|
||||||
|
2. Add proxy url as callback_url on Okta
|
||||||
|
|
||||||
|
On Okta, add the 'callback_url' as `<proxy_base_url>/sso/callback`
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/okta_callback_url.png')} />
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
<TabItem value="google" label="Google SSO">
|
<TabItem value="google" label="Google SSO">
|
||||||
|
|
||||||
- Create a new Oauth 2.0 Client on https://console.cloud.google.com/
|
- Create a new Oauth 2.0 Client on https://console.cloud.google.com/
|
||||||
|
@ -115,7 +137,6 @@ MICROSOFT_TENANT="5a39737
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="Generic" label="Generic SSO Provider">
|
<TabItem value="Generic" label="Generic SSO Provider">
|
||||||
|
|
||||||
A generic OAuth client that can be used to quickly create support for any OAuth provider with close to no code
|
A generic OAuth client that can be used to quickly create support for any OAuth provider with close to no code
|
||||||
|
|
|
@ -63,7 +63,7 @@ You can:
|
||||||
- Add budgets to Teams
|
- Add budgets to Teams
|
||||||
|
|
||||||
|
|
||||||
#### **Add budgets to users**
|
#### **Add budgets to teams**
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://localhost:4000/team/new' \
|
curl --location 'http://localhost:4000/team/new' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
@ -102,6 +102,22 @@ curl --location 'http://localhost:4000/team/new' \
|
||||||
"budget_reset_at": null
|
"budget_reset_at": null
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### **Add budget duration to teams**
|
||||||
|
|
||||||
|
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
|
|
||||||
|
```
|
||||||
|
curl 'http://0.0.0.0:4000/team/new' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{
|
||||||
|
"team_alias": "my-new-team_4",
|
||||||
|
"members_with_roles": [{"role": "admin", "user_id": "5c4a0aa3-a1e1-43dc-bd87-3c2da8382a3a"}],
|
||||||
|
"budget_duration": 10s,
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="per-team-member" label="For Team Members">
|
<TabItem value="per-team-member" label="For Team Members">
|
||||||
|
|
||||||
|
|
BIN
docs/my-website/img/custom_root_path.png
Normal file
BIN
docs/my-website/img/custom_root_path.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 151 KiB |
BIN
docs/my-website/img/okta_callback_url.png
Normal file
BIN
docs/my-website/img/okta_callback_url.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 279 KiB |
BIN
docs/my-website/img/otel_parent.png
Normal file
BIN
docs/my-website/img/otel_parent.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 200 KiB |
BIN
docs/my-website/img/raw_request_log.png
Normal file
BIN
docs/my-website/img/raw_request_log.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 168 KiB |
|
@ -183,6 +183,7 @@ const sidebars = {
|
||||||
label: "Logging & Observability",
|
label: "Logging & Observability",
|
||||||
items: [
|
items: [
|
||||||
"debugging/local_debugging",
|
"debugging/local_debugging",
|
||||||
|
"observability/raw_request_response",
|
||||||
"observability/callbacks",
|
"observability/callbacks",
|
||||||
"observability/custom_callback",
|
"observability/custom_callback",
|
||||||
"observability/langfuse_integration",
|
"observability/langfuse_integration",
|
||||||
|
@ -256,6 +257,7 @@ const sidebars = {
|
||||||
"projects/GPT Migrate",
|
"projects/GPT Migrate",
|
||||||
"projects/YiVal",
|
"projects/YiVal",
|
||||||
"projects/LiteLLM Proxy",
|
"projects/LiteLLM Proxy",
|
||||||
|
"projects/llm_cord",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
|
@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
|
||||||
pre_call_rules: List[Callable] = []
|
pre_call_rules: List[Callable] = []
|
||||||
post_call_rules: List[Callable] = []
|
post_call_rules: List[Callable] = []
|
||||||
turn_off_message_logging: Optional[bool] = False
|
turn_off_message_logging: Optional[bool] = False
|
||||||
|
log_raw_request_response: bool = False
|
||||||
redact_messages_in_exceptions: Optional[bool] = False
|
redact_messages_in_exceptions: Optional[bool] = False
|
||||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||||
## end of callbacks #############
|
## end of callbacks #############
|
||||||
|
@ -407,6 +408,7 @@ openai_compatible_providers: List = [
|
||||||
"together_ai",
|
"together_ai",
|
||||||
"fireworks_ai",
|
"fireworks_ai",
|
||||||
"friendliai",
|
"friendliai",
|
||||||
|
"azure_ai",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -611,6 +613,7 @@ provider_list: List = [
|
||||||
"baseten",
|
"baseten",
|
||||||
"azure",
|
"azure",
|
||||||
"azure_text",
|
"azure_text",
|
||||||
|
"azure_ai",
|
||||||
"sagemaker",
|
"sagemaker",
|
||||||
"bedrock",
|
"bedrock",
|
||||||
"vllm",
|
"vllm",
|
||||||
|
@ -765,7 +768,7 @@ from .llms.gemini import GeminiConfig
|
||||||
from .llms.nlp_cloud import NLPCloudConfig
|
from .llms.nlp_cloud import NLPCloudConfig
|
||||||
from .llms.aleph_alpha import AlephAlphaConfig
|
from .llms.aleph_alpha import AlephAlphaConfig
|
||||||
from .llms.petals import PetalsConfig
|
from .llms.petals import PetalsConfig
|
||||||
from .llms.vertex_ai import VertexAIConfig
|
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
|
||||||
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
|
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
|
||||||
from .llms.sagemaker import SagemakerConfig
|
from .llms.sagemaker import SagemakerConfig
|
||||||
from .llms.ollama import OllamaConfig
|
from .llms.ollama import OllamaConfig
|
||||||
|
@ -787,6 +790,7 @@ from .llms.openai import (
|
||||||
OpenAIConfig,
|
OpenAIConfig,
|
||||||
OpenAITextCompletionConfig,
|
OpenAITextCompletionConfig,
|
||||||
MistralConfig,
|
MistralConfig,
|
||||||
|
MistralEmbeddingConfig,
|
||||||
DeepInfraConfig,
|
DeepInfraConfig,
|
||||||
)
|
)
|
||||||
from .llms.azure import (
|
from .llms.azure import (
|
||||||
|
|
|
@ -337,8 +337,6 @@ def response_cost_calculator(
|
||||||
and custom_llm_provider is True
|
and custom_llm_provider is True
|
||||||
): # override defaults if custom pricing is set
|
): # override defaults if custom pricing is set
|
||||||
base_model = model
|
base_model = model
|
||||||
elif base_model is None:
|
|
||||||
base_model = model
|
|
||||||
# base_model defaults to None if not set on model_info
|
# base_model defaults to None if not set on model_info
|
||||||
response_cost = completion_cost(
|
response_cost = completion_cost(
|
||||||
completion_response=response_object,
|
completion_response=response_object,
|
||||||
|
|
|
@ -337,6 +337,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
|
||||||
model=self.model, # type: ignore
|
model=self.model, # type: ignore
|
||||||
llm_provider=self.llm_provider, # type: ignore
|
llm_provider=self.llm_provider, # type: ignore
|
||||||
response=response,
|
response=response,
|
||||||
|
litellm_debug_info=self.litellm_debug_info,
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -379,6 +380,7 @@ class RejectedRequestError(BadRequestError): # type: ignore
|
||||||
model=self.model, # type: ignore
|
model=self.model, # type: ignore
|
||||||
llm_provider=self.llm_provider, # type: ignore
|
llm_provider=self.llm_provider, # type: ignore
|
||||||
response=response,
|
response=response,
|
||||||
|
litellm_debug_info=self.litellm_debug_info,
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -418,6 +420,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
||||||
model=self.model, # type: ignore
|
model=self.model, # type: ignore
|
||||||
llm_provider=self.llm_provider, # type: ignore
|
llm_provider=self.llm_provider, # type: ignore
|
||||||
response=response,
|
response=response,
|
||||||
|
litellm_debug_info=self.litellm_debug_info,
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
|
@ -6,17 +6,23 @@ import litellm
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.types.services import ServiceLoggerPayload
|
from litellm.types.services import ServiceLoggerPayload
|
||||||
|
from functools import wraps
|
||||||
from typing import Union, Optional, TYPE_CHECKING, Any
|
from typing import Union, Optional, TYPE_CHECKING, Any
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
|
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
|
||||||
|
from litellm.proxy._types import (
|
||||||
|
ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
|
||||||
|
)
|
||||||
|
|
||||||
Span = _Span
|
Span = _Span
|
||||||
UserAPIKeyAuth = _UserAPIKeyAuth
|
UserAPIKeyAuth = _UserAPIKeyAuth
|
||||||
|
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
UserAPIKeyAuth = Any
|
UserAPIKeyAuth = Any
|
||||||
|
ManagementEndpointLoggingPayload = Any
|
||||||
|
|
||||||
|
|
||||||
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||||
|
@ -247,7 +253,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
span.end(end_time=self._to_ns(end_time))
|
span.end(end_time=self._to_ns(end_time))
|
||||||
|
|
||||||
def set_tools_attributes(self, span: Span, tools):
|
def set_tools_attributes(self, span: Span, tools):
|
||||||
from opentelemetry.semconv.ai import SpanAttributes
|
from litellm.proxy._types import SpanAttributes
|
||||||
import json
|
import json
|
||||||
|
|
||||||
if not tools:
|
if not tools:
|
||||||
|
@ -272,7 +278,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def set_attributes(self, span: Span, kwargs, response_obj):
|
def set_attributes(self, span: Span, kwargs, response_obj):
|
||||||
from opentelemetry.semconv.ai import SpanAttributes
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
@ -407,7 +413,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
||||||
from opentelemetry.semconv.ai import SpanAttributes
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
@ -454,6 +460,23 @@ class OpenTelemetry(CustomLogger):
|
||||||
def _get_span_name(self, kwargs):
|
def _get_span_name(self, kwargs):
|
||||||
return LITELLM_REQUEST_SPAN_NAME
|
return LITELLM_REQUEST_SPAN_NAME
|
||||||
|
|
||||||
|
def get_traceparent_from_header(self, headers):
|
||||||
|
if headers is None:
|
||||||
|
return None
|
||||||
|
_traceparent = headers.get("traceparent", None)
|
||||||
|
if _traceparent is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
from opentelemetry.trace.propagation.tracecontext import (
|
||||||
|
TraceContextTextMapPropagator,
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_logger.debug("OpenTelemetry: GOT A TRACEPARENT {}".format(_traceparent))
|
||||||
|
propagator = TraceContextTextMapPropagator()
|
||||||
|
_parent_context = propagator.extract(carrier={"traceparent": _traceparent})
|
||||||
|
verbose_logger.debug("OpenTelemetry: PARENT CONTEXT {}".format(_parent_context))
|
||||||
|
return _parent_context
|
||||||
|
|
||||||
def _get_span_context(self, kwargs):
|
def _get_span_context(self, kwargs):
|
||||||
from opentelemetry.trace.propagation.tracecontext import (
|
from opentelemetry.trace.propagation.tracecontext import (
|
||||||
TraceContextTextMapPropagator,
|
TraceContextTextMapPropagator,
|
||||||
|
@ -545,3 +568,91 @@ class OpenTelemetry(CustomLogger):
|
||||||
self.OTEL_EXPORTER,
|
self.OTEL_EXPORTER,
|
||||||
)
|
)
|
||||||
return BatchSpanProcessor(ConsoleSpanExporter())
|
return BatchSpanProcessor(ConsoleSpanExporter())
|
||||||
|
|
||||||
|
async def async_management_endpoint_success_hook(
|
||||||
|
self,
|
||||||
|
logging_payload: ManagementEndpointLoggingPayload,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
):
|
||||||
|
from opentelemetry import trace
|
||||||
|
from datetime import datetime
|
||||||
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
|
_start_time_ns = logging_payload.start_time
|
||||||
|
_end_time_ns = logging_payload.end_time
|
||||||
|
|
||||||
|
start_time = logging_payload.start_time
|
||||||
|
end_time = logging_payload.end_time
|
||||||
|
|
||||||
|
if isinstance(start_time, float):
|
||||||
|
_start_time_ns = int(int(start_time) * 1e9)
|
||||||
|
else:
|
||||||
|
_start_time_ns = self._to_ns(start_time)
|
||||||
|
|
||||||
|
if isinstance(end_time, float):
|
||||||
|
_end_time_ns = int(int(end_time) * 1e9)
|
||||||
|
else:
|
||||||
|
_end_time_ns = self._to_ns(end_time)
|
||||||
|
|
||||||
|
if parent_otel_span is not None:
|
||||||
|
_span_name = logging_payload.route
|
||||||
|
management_endpoint_span = self.tracer.start_span(
|
||||||
|
name=_span_name,
|
||||||
|
context=trace.set_span_in_context(parent_otel_span),
|
||||||
|
start_time=_start_time_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
_request_data = logging_payload.request_data
|
||||||
|
if _request_data is not None:
|
||||||
|
for key, value in _request_data.items():
|
||||||
|
management_endpoint_span.set_attribute(f"request.{key}", value)
|
||||||
|
|
||||||
|
_response = logging_payload.response
|
||||||
|
if _response is not None:
|
||||||
|
for key, value in _response.items():
|
||||||
|
management_endpoint_span.set_attribute(f"response.{key}", value)
|
||||||
|
management_endpoint_span.set_status(Status(StatusCode.OK))
|
||||||
|
management_endpoint_span.end(end_time=_end_time_ns)
|
||||||
|
|
||||||
|
async def async_management_endpoint_failure_hook(
|
||||||
|
self,
|
||||||
|
logging_payload: ManagementEndpointLoggingPayload,
|
||||||
|
parent_otel_span: Optional[Span] = None,
|
||||||
|
):
|
||||||
|
from opentelemetry import trace
|
||||||
|
from datetime import datetime
|
||||||
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
|
_start_time_ns = logging_payload.start_time
|
||||||
|
_end_time_ns = logging_payload.end_time
|
||||||
|
|
||||||
|
start_time = logging_payload.start_time
|
||||||
|
end_time = logging_payload.end_time
|
||||||
|
|
||||||
|
if isinstance(start_time, float):
|
||||||
|
_start_time_ns = int(int(start_time) * 1e9)
|
||||||
|
else:
|
||||||
|
_start_time_ns = self._to_ns(start_time)
|
||||||
|
|
||||||
|
if isinstance(end_time, float):
|
||||||
|
_end_time_ns = int(int(end_time) * 1e9)
|
||||||
|
else:
|
||||||
|
_end_time_ns = self._to_ns(end_time)
|
||||||
|
|
||||||
|
if parent_otel_span is not None:
|
||||||
|
_span_name = logging_payload.route
|
||||||
|
management_endpoint_span = self.tracer.start_span(
|
||||||
|
name=_span_name,
|
||||||
|
context=trace.set_span_in_context(parent_otel_span),
|
||||||
|
start_time=_start_time_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
_request_data = logging_payload.request_data
|
||||||
|
if _request_data is not None:
|
||||||
|
for key, value in _request_data.items():
|
||||||
|
management_endpoint_span.set_attribute(f"request.{key}", value)
|
||||||
|
|
||||||
|
_exception = logging_payload.exception
|
||||||
|
management_endpoint_span.set_attribute(f"exception", str(_exception))
|
||||||
|
management_endpoint_span.set_status(Status(StatusCode.ERROR))
|
||||||
|
management_endpoint_span.end(end_time=_end_time_ns)
|
||||||
|
|
|
@ -36,6 +36,9 @@ from ..types.llms.openai import (
|
||||||
AsyncAssistantStreamManager,
|
AsyncAssistantStreamManager,
|
||||||
AssistantStreamManager,
|
AssistantStreamManager,
|
||||||
)
|
)
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
|
||||||
|
azure_ad_cache = DualCache()
|
||||||
|
|
||||||
|
|
||||||
class AzureOpenAIError(Exception):
|
class AzureOpenAIError(Exception):
|
||||||
|
@ -309,9 +312,10 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
|
||||||
|
|
||||||
def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||||
azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
|
azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
|
||||||
azure_tenant = os.getenv("AZURE_TENANT_ID", None)
|
azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
|
||||||
|
azure_authority_host = os.getenv("AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com")
|
||||||
|
|
||||||
if azure_client_id is None or azure_tenant is None:
|
if azure_client_id is None or azure_tenant_id is None:
|
||||||
raise AzureOpenAIError(
|
raise AzureOpenAIError(
|
||||||
status_code=422,
|
status_code=422,
|
||||||
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
|
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
|
||||||
|
@ -325,8 +329,19 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||||
message="OIDC token could not be retrieved from secret manager.",
|
message="OIDC token could not be retrieved from secret manager.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
azure_ad_token_cache_key = json.dumps({
|
||||||
|
"azure_client_id": azure_client_id,
|
||||||
|
"azure_tenant_id": azure_tenant_id,
|
||||||
|
"azure_authority_host": azure_authority_host,
|
||||||
|
"oidc_token": oidc_token,
|
||||||
|
})
|
||||||
|
|
||||||
|
azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
|
||||||
|
if azure_ad_token_access_token is not None:
|
||||||
|
return azure_ad_token_access_token
|
||||||
|
|
||||||
req_token = httpx.post(
|
req_token = httpx.post(
|
||||||
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token",
|
f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
|
||||||
data={
|
data={
|
||||||
"client_id": azure_client_id,
|
"client_id": azure_client_id,
|
||||||
"grant_type": "client_credentials",
|
"grant_type": "client_credentials",
|
||||||
|
@ -342,12 +357,23 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||||
message=req_token.text,
|
message=req_token.text,
|
||||||
)
|
)
|
||||||
|
|
||||||
possible_azure_ad_token = req_token.json().get("access_token", None)
|
azure_ad_token_json = req_token.json()
|
||||||
|
azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
|
||||||
|
azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
|
||||||
|
|
||||||
if possible_azure_ad_token is None:
|
if azure_ad_token_access_token is None:
|
||||||
raise AzureOpenAIError(status_code=422, message="Azure AD Token not returned")
|
raise AzureOpenAIError(
|
||||||
|
status_code=422, message="Azure AD Token access_token not returned"
|
||||||
|
)
|
||||||
|
|
||||||
return possible_azure_ad_token
|
if azure_ad_token_expires_in is None:
|
||||||
|
raise AzureOpenAIError(
|
||||||
|
status_code=422, message="Azure AD Token expires_in not returned"
|
||||||
|
)
|
||||||
|
|
||||||
|
azure_ad_cache.set_cache(key=azure_ad_token_cache_key, value=azure_ad_token_access_token, ttl=azure_ad_token_expires_in)
|
||||||
|
|
||||||
|
return azure_ad_token_access_token
|
||||||
|
|
||||||
|
|
||||||
class AzureChatCompletion(BaseLLM):
|
class AzureChatCompletion(BaseLLM):
|
||||||
|
|
|
@ -51,8 +51,11 @@ from litellm.types.llms.openai import (
|
||||||
ChatCompletionResponseMessage,
|
ChatCompletionResponseMessage,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
|
ChatCompletionDeltaChunk,
|
||||||
)
|
)
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
|
||||||
|
iam_cache = DualCache()
|
||||||
|
|
||||||
class AmazonCohereChatConfig:
|
class AmazonCohereChatConfig:
|
||||||
"""
|
"""
|
||||||
|
@ -324,38 +327,53 @@ class BedrockLLM(BaseLLM):
|
||||||
) = params_to_check
|
) = params_to_check
|
||||||
|
|
||||||
### CHECK STS ###
|
### CHECK STS ###
|
||||||
if (
|
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||||
aws_web_identity_token is not None
|
iam_creds_cache_key = json.dumps({
|
||||||
and aws_role_name is not None
|
"aws_web_identity_token": aws_web_identity_token,
|
||||||
and aws_session_name is not None
|
"aws_role_name": aws_role_name,
|
||||||
):
|
"aws_session_name": aws_session_name,
|
||||||
oidc_token = get_secret(aws_web_identity_token)
|
"aws_region_name": aws_region_name,
|
||||||
|
})
|
||||||
|
|
||||||
if oidc_token is None:
|
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||||
raise BedrockError(
|
if iam_creds_dict is None:
|
||||||
message="OIDC token could not be retrieved from secret manager.",
|
oidc_token = get_secret(aws_web_identity_token)
|
||||||
status_code=401,
|
|
||||||
|
if oidc_token is None:
|
||||||
|
raise BedrockError(
|
||||||
|
message="OIDC token could not be retrieved from secret manager.",
|
||||||
|
status_code=401,
|
||||||
|
)
|
||||||
|
|
||||||
|
sts_client = boto3.client(
|
||||||
|
"sts",
|
||||||
|
region_name=aws_region_name,
|
||||||
|
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||||
)
|
)
|
||||||
|
|
||||||
sts_client = boto3.client("sts")
|
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||||
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||||
|
sts_response = sts_client.assume_role_with_web_identity(
|
||||||
|
RoleArn=aws_role_name,
|
||||||
|
RoleSessionName=aws_session_name,
|
||||||
|
WebIdentityToken=oidc_token,
|
||||||
|
DurationSeconds=3600,
|
||||||
|
)
|
||||||
|
|
||||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
iam_creds_dict = {
|
||||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||||
sts_response = sts_client.assume_role_with_web_identity(
|
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||||
RoleArn=aws_role_name,
|
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||||
RoleSessionName=aws_session_name,
|
"region_name": aws_region_name,
|
||||||
WebIdentityToken=oidc_token,
|
}
|
||||||
DurationSeconds=3600,
|
|
||||||
)
|
|
||||||
|
|
||||||
session = boto3.Session(
|
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
|
||||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
|
||||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
|
||||||
region_name=aws_region_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
return session.get_credentials()
|
session = boto3.Session(**iam_creds_dict)
|
||||||
|
|
||||||
|
iam_creds = session.get_credentials()
|
||||||
|
|
||||||
|
return iam_creds
|
||||||
elif aws_role_name is not None and aws_session_name is not None:
|
elif aws_role_name is not None and aws_session_name is not None:
|
||||||
sts_client = boto3.client(
|
sts_client = boto3.client(
|
||||||
"sts",
|
"sts",
|
||||||
|
@ -1415,38 +1433,53 @@ class BedrockConverseLLM(BaseLLM):
|
||||||
) = params_to_check
|
) = params_to_check
|
||||||
|
|
||||||
### CHECK STS ###
|
### CHECK STS ###
|
||||||
if (
|
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||||
aws_web_identity_token is not None
|
iam_creds_cache_key = json.dumps({
|
||||||
and aws_role_name is not None
|
"aws_web_identity_token": aws_web_identity_token,
|
||||||
and aws_session_name is not None
|
"aws_role_name": aws_role_name,
|
||||||
):
|
"aws_session_name": aws_session_name,
|
||||||
oidc_token = get_secret(aws_web_identity_token)
|
"aws_region_name": aws_region_name,
|
||||||
|
})
|
||||||
|
|
||||||
if oidc_token is None:
|
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||||
raise BedrockError(
|
if iam_creds_dict is None:
|
||||||
message="OIDC token could not be retrieved from secret manager.",
|
oidc_token = get_secret(aws_web_identity_token)
|
||||||
status_code=401,
|
|
||||||
|
if oidc_token is None:
|
||||||
|
raise BedrockError(
|
||||||
|
message="OIDC token could not be retrieved from secret manager.",
|
||||||
|
status_code=401,
|
||||||
|
)
|
||||||
|
|
||||||
|
sts_client = boto3.client(
|
||||||
|
"sts",
|
||||||
|
region_name=aws_region_name,
|
||||||
|
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||||
)
|
)
|
||||||
|
|
||||||
sts_client = boto3.client("sts")
|
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||||
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||||
|
sts_response = sts_client.assume_role_with_web_identity(
|
||||||
|
RoleArn=aws_role_name,
|
||||||
|
RoleSessionName=aws_session_name,
|
||||||
|
WebIdentityToken=oidc_token,
|
||||||
|
DurationSeconds=3600,
|
||||||
|
)
|
||||||
|
|
||||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
iam_creds_dict = {
|
||||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||||
sts_response = sts_client.assume_role_with_web_identity(
|
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||||
RoleArn=aws_role_name,
|
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||||
RoleSessionName=aws_session_name,
|
"region_name": aws_region_name,
|
||||||
WebIdentityToken=oidc_token,
|
}
|
||||||
DurationSeconds=3600,
|
|
||||||
)
|
|
||||||
|
|
||||||
session = boto3.Session(
|
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
|
||||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
|
||||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
|
||||||
region_name=aws_region_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
return session.get_credentials()
|
session = boto3.Session(**iam_creds_dict)
|
||||||
|
|
||||||
|
iam_creds = session.get_credentials()
|
||||||
|
|
||||||
|
return iam_creds
|
||||||
elif aws_role_name is not None and aws_session_name is not None:
|
elif aws_role_name is not None and aws_session_name is not None:
|
||||||
sts_client = boto3.client(
|
sts_client = boto3.client(
|
||||||
"sts",
|
"sts",
|
||||||
|
@ -1859,29 +1892,59 @@ class AWSEventStreamDecoder:
|
||||||
self.parser = EventStreamJSONParser()
|
self.parser = EventStreamJSONParser()
|
||||||
|
|
||||||
def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
||||||
text = ""
|
try:
|
||||||
tool_str = ""
|
text = ""
|
||||||
is_finished = False
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
finish_reason = ""
|
is_finished = False
|
||||||
usage: Optional[ConverseTokenUsageBlock] = None
|
finish_reason = ""
|
||||||
if "delta" in chunk_data:
|
usage: Optional[ConverseTokenUsageBlock] = None
|
||||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
|
||||||
if "text" in delta_obj:
|
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||||
text = delta_obj["text"]
|
if "start" in chunk_data:
|
||||||
elif "toolUse" in delta_obj:
|
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
||||||
tool_str = delta_obj["toolUse"]["input"]
|
if (
|
||||||
elif "stopReason" in chunk_data:
|
start_obj is not None
|
||||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
and "toolUse" in start_obj
|
||||||
elif "usage" in chunk_data:
|
and start_obj["toolUse"] is not None
|
||||||
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
|
):
|
||||||
response = GenericStreamingChunk(
|
tool_use = {
|
||||||
text=text,
|
"id": start_obj["toolUse"]["toolUseId"],
|
||||||
tool_str=tool_str,
|
"type": "function",
|
||||||
is_finished=is_finished,
|
"function": {
|
||||||
finish_reason=finish_reason,
|
"name": start_obj["toolUse"]["name"],
|
||||||
usage=usage,
|
"arguments": "",
|
||||||
)
|
},
|
||||||
return response
|
}
|
||||||
|
elif "delta" in chunk_data:
|
||||||
|
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||||
|
if "text" in delta_obj:
|
||||||
|
text = delta_obj["text"]
|
||||||
|
elif "toolUse" in delta_obj:
|
||||||
|
tool_use = {
|
||||||
|
"id": None,
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": None,
|
||||||
|
"arguments": delta_obj["toolUse"]["input"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
elif "stopReason" in chunk_data:
|
||||||
|
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||||
|
is_finished = True
|
||||||
|
elif "usage" in chunk_data:
|
||||||
|
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
|
||||||
|
|
||||||
|
response = GenericStreamingChunk(
|
||||||
|
text=text,
|
||||||
|
tool_use=tool_use,
|
||||||
|
is_finished=is_finished,
|
||||||
|
finish_reason=finish_reason,
|
||||||
|
usage=usage,
|
||||||
|
index=index,
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception("Received streaming error - {}".format(str(e)))
|
||||||
|
|
||||||
def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
||||||
text = ""
|
text = ""
|
||||||
|
@ -1890,12 +1953,16 @@ class AWSEventStreamDecoder:
|
||||||
if "outputText" in chunk_data:
|
if "outputText" in chunk_data:
|
||||||
text = chunk_data["outputText"]
|
text = chunk_data["outputText"]
|
||||||
# ai21 mapping
|
# ai21 mapping
|
||||||
if "ai21" in self.model: # fake ai21 streaming
|
elif "ai21" in self.model: # fake ai21 streaming
|
||||||
text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
|
text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
|
||||||
is_finished = True
|
is_finished = True
|
||||||
finish_reason = "stop"
|
finish_reason = "stop"
|
||||||
######## bedrock.anthropic mappings ###############
|
######## bedrock.anthropic mappings ###############
|
||||||
elif "delta" in chunk_data:
|
elif (
|
||||||
|
"contentBlockIndex" in chunk_data
|
||||||
|
or "stopReason" in chunk_data
|
||||||
|
or "metrics" in chunk_data
|
||||||
|
):
|
||||||
return self.converse_chunk_parser(chunk_data=chunk_data)
|
return self.converse_chunk_parser(chunk_data=chunk_data)
|
||||||
######## bedrock.mistral mappings ###############
|
######## bedrock.mistral mappings ###############
|
||||||
elif "outputs" in chunk_data:
|
elif "outputs" in chunk_data:
|
||||||
|
@ -1905,7 +1972,7 @@ class AWSEventStreamDecoder:
|
||||||
):
|
):
|
||||||
text = chunk_data["outputs"][0]["text"]
|
text = chunk_data["outputs"][0]["text"]
|
||||||
stop_reason = chunk_data.get("stop_reason", None)
|
stop_reason = chunk_data.get("stop_reason", None)
|
||||||
if stop_reason != None:
|
if stop_reason is not None:
|
||||||
is_finished = True
|
is_finished = True
|
||||||
finish_reason = stop_reason
|
finish_reason = stop_reason
|
||||||
######## bedrock.cohere mappings ###############
|
######## bedrock.cohere mappings ###############
|
||||||
|
@ -1926,8 +1993,9 @@ class AWSEventStreamDecoder:
|
||||||
text=text,
|
text=text,
|
||||||
is_finished=is_finished,
|
is_finished=is_finished,
|
||||||
finish_reason=finish_reason,
|
finish_reason=finish_reason,
|
||||||
tool_str="",
|
|
||||||
usage=None,
|
usage=None,
|
||||||
|
index=0,
|
||||||
|
tool_use=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:
|
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:
|
||||||
|
|
|
@ -139,6 +139,7 @@ def process_response(
|
||||||
|
|
||||||
def convert_model_to_url(model: str, api_base: str):
|
def convert_model_to_url(model: str, api_base: str):
|
||||||
user_id, app_id, model_id = model.split(".")
|
user_id, app_id, model_id = model.split(".")
|
||||||
|
model_id = model_id.lower()
|
||||||
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
|
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
|
||||||
|
|
||||||
|
|
||||||
|
@ -171,19 +172,55 @@ async def async_completion(
|
||||||
|
|
||||||
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||||||
response = await async_handler.post(
|
response = await async_handler.post(
|
||||||
api_base, headers=headers, data=json.dumps(data)
|
url=model, headers=headers, data=json.dumps(data)
|
||||||
)
|
)
|
||||||
|
|
||||||
return process_response(
|
logging_obj.post_call(
|
||||||
model=model,
|
input=prompt,
|
||||||
prompt=prompt,
|
|
||||||
response=response,
|
|
||||||
model_response=model_response,
|
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
data=data,
|
original_response=response.text,
|
||||||
encoding=encoding,
|
additional_args={"complete_input_dict": data},
|
||||||
logging_obj=logging_obj,
|
|
||||||
)
|
)
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
try:
|
||||||
|
completion_response = response.json()
|
||||||
|
except Exception:
|
||||||
|
raise ClarifaiError(
|
||||||
|
message=response.text, status_code=response.status_code, url=model
|
||||||
|
)
|
||||||
|
# print(completion_response)
|
||||||
|
try:
|
||||||
|
choices_list = []
|
||||||
|
for idx, item in enumerate(completion_response["outputs"]):
|
||||||
|
if len(item["data"]["text"]["raw"]) > 0:
|
||||||
|
message_obj = Message(content=item["data"]["text"]["raw"])
|
||||||
|
else:
|
||||||
|
message_obj = Message(content=None)
|
||||||
|
choice_obj = Choices(
|
||||||
|
finish_reason="stop",
|
||||||
|
index=idx + 1, # check
|
||||||
|
message=message_obj,
|
||||||
|
)
|
||||||
|
choices_list.append(choice_obj)
|
||||||
|
model_response["choices"] = choices_list
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ClarifaiError(
|
||||||
|
message=traceback.format_exc(), status_code=response.status_code, url=model
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate Usage
|
||||||
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
|
completion_tokens = len(
|
||||||
|
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||||
|
)
|
||||||
|
model_response["model"] = model
|
||||||
|
model_response["usage"] = Usage(
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
total_tokens=prompt_tokens + completion_tokens,
|
||||||
|
)
|
||||||
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
def completion(
|
def completion(
|
||||||
|
@ -241,7 +278,7 @@ def completion(
|
||||||
additional_args={
|
additional_args={
|
||||||
"complete_input_dict": data,
|
"complete_input_dict": data,
|
||||||
"headers": headers,
|
"headers": headers,
|
||||||
"api_base": api_base,
|
"api_base": model,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if acompletion == True:
|
if acompletion == True:
|
||||||
|
|
|
@ -164,6 +164,49 @@ class MistralConfig:
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
class MistralEmbeddingConfig:
|
||||||
|
"""
|
||||||
|
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals().copy()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(self):
|
||||||
|
return [
|
||||||
|
"encoding_format",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param == "encoding_format":
|
||||||
|
optional_params["encoding_format"] = value
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
class DeepInfraConfig:
|
class DeepInfraConfig:
|
||||||
"""
|
"""
|
||||||
Reference: https://deepinfra.com/docs/advanced/openai_api
|
Reference: https://deepinfra.com/docs/advanced/openai_api
|
||||||
|
|
199998
litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790
Normal file
199998
litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790
Normal file
File diff suppressed because it is too large
Load diff
|
@ -4,6 +4,7 @@ from enum import Enum
|
||||||
import requests # type: ignore
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional, Union, List, Literal, Any
|
from typing import Callable, Optional, Union, List, Literal, Any
|
||||||
|
from pydantic import BaseModel
|
||||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||||
import litellm, uuid
|
import litellm, uuid
|
||||||
import httpx, inspect # type: ignore
|
import httpx, inspect # type: ignore
|
||||||
|
@ -12,7 +13,12 @@ from litellm.llms.prompt_templates.factory import (
|
||||||
convert_to_gemini_tool_call_result,
|
convert_to_gemini_tool_call_result,
|
||||||
convert_to_gemini_tool_call_invoke,
|
convert_to_gemini_tool_call_invoke,
|
||||||
)
|
)
|
||||||
from litellm.types.files import get_file_mime_type_for_file_type, get_file_type_from_extension, is_gemini_1_5_accepted_file_type, is_video_file_type
|
from litellm.types.files import (
|
||||||
|
get_file_mime_type_for_file_type,
|
||||||
|
get_file_type_from_extension,
|
||||||
|
is_gemini_1_5_accepted_file_type,
|
||||||
|
is_video_file_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VertexAIError(Exception):
|
class VertexAIError(Exception):
|
||||||
|
@ -301,15 +307,15 @@ def _process_gemini_image(image_url: str) -> PartType:
|
||||||
# GCS URIs
|
# GCS URIs
|
||||||
if "gs://" in image_url:
|
if "gs://" in image_url:
|
||||||
# Figure out file type
|
# Figure out file type
|
||||||
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
|
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
|
||||||
extension = extension_with_dot[1:] # Ex: "png"
|
extension = extension_with_dot[1:] # Ex: "png"
|
||||||
|
|
||||||
file_type = get_file_type_from_extension(extension)
|
file_type = get_file_type_from_extension(extension)
|
||||||
|
|
||||||
# Validate the file type is supported by Gemini
|
# Validate the file type is supported by Gemini
|
||||||
if not is_gemini_1_5_accepted_file_type(file_type):
|
if not is_gemini_1_5_accepted_file_type(file_type):
|
||||||
raise Exception(f"File type not supported by gemini - {file_type}")
|
raise Exception(f"File type not supported by gemini - {file_type}")
|
||||||
|
|
||||||
mime_type = get_file_mime_type_for_file_type(file_type)
|
mime_type = get_file_mime_type_for_file_type(file_type)
|
||||||
file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
|
file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
|
||||||
|
|
||||||
|
@ -320,7 +326,7 @@ def _process_gemini_image(image_url: str) -> PartType:
|
||||||
image = _load_image_from_url(image_url)
|
image = _load_image_from_url(image_url)
|
||||||
_blob = BlobType(data=image.data, mime_type=image._mime_type)
|
_blob = BlobType(data=image.data, mime_type=image._mime_type)
|
||||||
return PartType(inline_data=_blob)
|
return PartType(inline_data=_blob)
|
||||||
|
|
||||||
# Base64 encoding
|
# Base64 encoding
|
||||||
elif "base64" in image_url:
|
elif "base64" in image_url:
|
||||||
import base64, re
|
import base64, re
|
||||||
|
@ -1293,6 +1299,95 @@ async def async_streaming(
|
||||||
return streamwrapper
|
return streamwrapper
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAITextEmbeddingConfig(BaseModel):
|
||||||
|
"""
|
||||||
|
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
|
||||||
|
|
||||||
|
Args:
|
||||||
|
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
|
||||||
|
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
|
||||||
|
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||||
|
"""
|
||||||
|
|
||||||
|
auto_truncate: Optional[bool] = None
|
||||||
|
task_type: Optional[
|
||||||
|
Literal[
|
||||||
|
"RETRIEVAL_QUERY",
|
||||||
|
"RETRIEVAL_DOCUMENT",
|
||||||
|
"SEMANTIC_SIMILARITY",
|
||||||
|
"CLASSIFICATION",
|
||||||
|
"CLUSTERING",
|
||||||
|
"QUESTION_ANSWERING",
|
||||||
|
"FACT_VERIFICATION",
|
||||||
|
]
|
||||||
|
] = None
|
||||||
|
title: Optional[str] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
auto_truncate: Optional[bool] = None,
|
||||||
|
task_type: Optional[
|
||||||
|
Literal[
|
||||||
|
"RETRIEVAL_QUERY",
|
||||||
|
"RETRIEVAL_DOCUMENT",
|
||||||
|
"SEMANTIC_SIMILARITY",
|
||||||
|
"CLASSIFICATION",
|
||||||
|
"CLUSTERING",
|
||||||
|
"QUESTION_ANSWERING",
|
||||||
|
"FACT_VERIFICATION",
|
||||||
|
]
|
||||||
|
] = None,
|
||||||
|
title: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(self):
|
||||||
|
return [
|
||||||
|
"dimensions",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param == "dimensions":
|
||||||
|
optional_params["output_dimensionality"] = value
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
def get_mapped_special_auth_params(self) -> dict:
|
||||||
|
"""
|
||||||
|
Common auth params across bedrock/vertex_ai/azure/watsonx
|
||||||
|
"""
|
||||||
|
return {"project": "vertex_project", "region_name": "vertex_location"}
|
||||||
|
|
||||||
|
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
||||||
|
mapped_params = self.get_mapped_special_auth_params()
|
||||||
|
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param in mapped_params:
|
||||||
|
optional_params[mapped_params[param]] = value
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
def embedding(
|
def embedding(
|
||||||
model: str,
|
model: str,
|
||||||
input: Union[list, str],
|
input: Union[list, str],
|
||||||
|
@ -1316,7 +1411,7 @@ def embedding(
|
||||||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||||||
)
|
)
|
||||||
|
|
||||||
from vertexai.language_models import TextEmbeddingModel
|
from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
|
||||||
import google.auth # type: ignore
|
import google.auth # type: ignore
|
||||||
|
|
||||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||||
|
@ -1347,6 +1442,16 @@ def embedding(
|
||||||
if isinstance(input, str):
|
if isinstance(input, str):
|
||||||
input = [input]
|
input = [input]
|
||||||
|
|
||||||
|
if optional_params is not None and isinstance(optional_params, dict):
|
||||||
|
if optional_params.get("task_type") or optional_params.get("title"):
|
||||||
|
# if user passed task_type or title, cast to TextEmbeddingInput
|
||||||
|
_task_type = optional_params.pop("task_type", None)
|
||||||
|
_title = optional_params.pop("title", None)
|
||||||
|
input = [
|
||||||
|
TextEmbeddingInput(text=x, task_type=_task_type, title=_title)
|
||||||
|
for x in input
|
||||||
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
llm_model = TextEmbeddingModel.from_pretrained(model)
|
llm_model = TextEmbeddingModel.from_pretrained(model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1363,7 +1468,8 @@ def embedding(
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
)
|
)
|
||||||
|
|
||||||
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
|
_input_dict = {"texts": input, **optional_params}
|
||||||
|
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
|
||||||
## LOGGING PRE-CALL
|
## LOGGING PRE-CALL
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=input,
|
input=input,
|
||||||
|
@ -1375,7 +1481,7 @@ def embedding(
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
embeddings = llm_model.get_embeddings(input)
|
embeddings = llm_model.get_embeddings(**_input_dict)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise VertexAIError(status_code=500, message=str(e))
|
raise VertexAIError(status_code=500, message=str(e))
|
||||||
|
|
||||||
|
@ -1383,6 +1489,7 @@ def embedding(
|
||||||
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
||||||
## Populate OpenAI compliant dictionary
|
## Populate OpenAI compliant dictionary
|
||||||
embedding_response = []
|
embedding_response = []
|
||||||
|
input_tokens: int = 0
|
||||||
for idx, embedding in enumerate(embeddings):
|
for idx, embedding in enumerate(embeddings):
|
||||||
embedding_response.append(
|
embedding_response.append(
|
||||||
{
|
{
|
||||||
|
@ -1391,14 +1498,10 @@ def embedding(
|
||||||
"embedding": embedding.values,
|
"embedding": embedding.values,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
input_tokens += embedding.statistics.token_count
|
||||||
model_response["object"] = "list"
|
model_response["object"] = "list"
|
||||||
model_response["data"] = embedding_response
|
model_response["data"] = embedding_response
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
input_tokens = 0
|
|
||||||
|
|
||||||
input_str = "".join(input)
|
|
||||||
|
|
||||||
input_tokens += len(encoding.encode(input_str))
|
|
||||||
|
|
||||||
usage = Usage(
|
usage = Usage(
|
||||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||||
|
@ -1420,7 +1523,8 @@ async def async_embedding(
|
||||||
"""
|
"""
|
||||||
Async embedding implementation
|
Async embedding implementation
|
||||||
"""
|
"""
|
||||||
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
|
_input_dict = {"texts": input, **optional_params}
|
||||||
|
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
|
||||||
## LOGGING PRE-CALL
|
## LOGGING PRE-CALL
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=input,
|
input=input,
|
||||||
|
@ -1432,7 +1536,7 @@ async def async_embedding(
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
embeddings = await client.get_embeddings_async(input)
|
embeddings = await client.get_embeddings_async(**_input_dict)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise VertexAIError(status_code=500, message=str(e))
|
raise VertexAIError(status_code=500, message=str(e))
|
||||||
|
|
||||||
|
@ -1440,6 +1544,7 @@ async def async_embedding(
|
||||||
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
||||||
## Populate OpenAI compliant dictionary
|
## Populate OpenAI compliant dictionary
|
||||||
embedding_response = []
|
embedding_response = []
|
||||||
|
input_tokens: int = 0
|
||||||
for idx, embedding in enumerate(embeddings):
|
for idx, embedding in enumerate(embeddings):
|
||||||
embedding_response.append(
|
embedding_response.append(
|
||||||
{
|
{
|
||||||
|
@ -1448,18 +1553,13 @@ async def async_embedding(
|
||||||
"embedding": embedding.values,
|
"embedding": embedding.values,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
input_tokens += embedding.statistics.token_count
|
||||||
|
|
||||||
model_response["object"] = "list"
|
model_response["object"] = "list"
|
||||||
model_response["data"] = embedding_response
|
model_response["data"] = embedding_response
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
input_tokens = 0
|
|
||||||
|
|
||||||
input_str = "".join(input)
|
|
||||||
|
|
||||||
input_tokens += len(encoding.encode(input_str))
|
|
||||||
|
|
||||||
usage = Usage(
|
usage = Usage(
|
||||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||||
)
|
)
|
||||||
model_response.usage = usage
|
model_response.usage = usage
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|
|
@ -11,10 +11,10 @@ import os, openai, sys, json, inspect, uuid, datetime, threading
|
||||||
from typing import Any, Literal, Union, BinaryIO
|
from typing import Any, Literal, Union, BinaryIO
|
||||||
from typing_extensions import overload
|
from typing_extensions import overload
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import dotenv, traceback, random, asyncio, time, contextvars
|
import dotenv, traceback, random, asyncio, time, contextvars
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from ._logging import verbose_logger
|
from ._logging import verbose_logger
|
||||||
from litellm import ( # type: ignore
|
from litellm import ( # type: ignore
|
||||||
|
@ -335,6 +335,7 @@ async def acompletion(
|
||||||
or custom_llm_provider == "predibase"
|
or custom_llm_provider == "predibase"
|
||||||
or custom_llm_provider == "bedrock"
|
or custom_llm_provider == "bedrock"
|
||||||
or custom_llm_provider == "databricks"
|
or custom_llm_provider == "databricks"
|
||||||
|
or custom_llm_provider == "clarifai"
|
||||||
or custom_llm_provider in litellm.openai_compatible_providers
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||||
init_response = await loop.run_in_executor(None, func_with_context)
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
|
|
@ -1387,6 +1387,26 @@
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||||
},
|
},
|
||||||
|
"text-embedding-004": {
|
||||||
|
"max_tokens": 3072,
|
||||||
|
"max_input_tokens": 3072,
|
||||||
|
"output_vector_size": 768,
|
||||||
|
"input_cost_per_token": 0.00000000625,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-embedding-models",
|
||||||
|
"mode": "embedding",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||||
|
},
|
||||||
|
"text-multilingual-embedding-002": {
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"max_input_tokens": 2048,
|
||||||
|
"output_vector_size": 768,
|
||||||
|
"input_cost_per_token": 0.00000000625,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-embedding-models",
|
||||||
|
"mode": "embedding",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||||
|
},
|
||||||
"textembedding-gecko": {
|
"textembedding-gecko": {
|
||||||
"max_tokens": 3072,
|
"max_tokens": 3072,
|
||||||
"max_input_tokens": 3072,
|
"max_input_tokens": 3072,
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""]
|
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -2,6 +2,6 @@
|
||||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -2,6 +2,6 @@
|
||||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,7 +1,12 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from logging import Formatter
|
from logging import Formatter
|
||||||
import sys
|
import os
|
||||||
|
from litellm import json_logs
|
||||||
|
|
||||||
|
# Set default log level to INFO
|
||||||
|
log_level = os.getenv("LITELLM_LOG", "INFO")
|
||||||
|
numeric_level: str = getattr(logging, log_level.upper())
|
||||||
|
|
||||||
|
|
||||||
class JsonFormatter(Formatter):
|
class JsonFormatter(Formatter):
|
||||||
|
@ -16,6 +21,14 @@ class JsonFormatter(Formatter):
|
||||||
|
|
||||||
logger = logging.root
|
logger = logging.root
|
||||||
handler = logging.StreamHandler()
|
handler = logging.StreamHandler()
|
||||||
handler.setFormatter(JsonFormatter())
|
if json_logs:
|
||||||
|
handler.setFormatter(JsonFormatter())
|
||||||
|
else:
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
handler.setFormatter(formatter)
|
||||||
logger.handlers = [handler]
|
logger.handlers = [handler]
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(numeric_level)
|
||||||
|
|
|
@ -719,6 +719,8 @@ class Member(LiteLLMBase):
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
|
if not isinstance(values, dict):
|
||||||
|
raise ValueError("input needs to be a dictionary")
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
return values
|
return values
|
||||||
|
@ -757,9 +759,24 @@ class GlobalEndUsersSpend(LiteLLMBase):
|
||||||
|
|
||||||
class TeamMemberAddRequest(LiteLLMBase):
|
class TeamMemberAddRequest(LiteLLMBase):
|
||||||
team_id: str
|
team_id: str
|
||||||
member: Member
|
member: Union[List[Member], Member]
|
||||||
max_budget_in_team: Optional[float] = None # Users max budget within the team
|
max_budget_in_team: Optional[float] = None # Users max budget within the team
|
||||||
|
|
||||||
|
def __init__(self, **data):
|
||||||
|
member_data = data.get("member")
|
||||||
|
if isinstance(member_data, list):
|
||||||
|
# If member is a list of dictionaries, convert each dictionary to a Member object
|
||||||
|
members = [Member(**item) for item in member_data]
|
||||||
|
# Replace member_data with the list of Member objects
|
||||||
|
data["member"] = members
|
||||||
|
elif isinstance(member_data, dict):
|
||||||
|
# If member is a dictionary, convert it to a single Member object
|
||||||
|
member = Member(**member_data)
|
||||||
|
# Replace member_data with the single Member object
|
||||||
|
data["member"] = member
|
||||||
|
# Call the superclass __init__ method to initialize the object
|
||||||
|
super().__init__(**data)
|
||||||
|
|
||||||
|
|
||||||
class TeamMemberDeleteRequest(LiteLLMBase):
|
class TeamMemberDeleteRequest(LiteLLMBase):
|
||||||
team_id: str
|
team_id: str
|
||||||
|
@ -1472,6 +1489,9 @@ class SpendLogsMetadata(TypedDict):
|
||||||
user_api_key_team_id: Optional[str]
|
user_api_key_team_id: Optional[str]
|
||||||
user_api_key_user_id: Optional[str]
|
user_api_key_user_id: Optional[str]
|
||||||
user_api_key_team_alias: Optional[str]
|
user_api_key_team_alias: Optional[str]
|
||||||
|
spend_logs_metadata: Optional[
|
||||||
|
dict
|
||||||
|
] # special param to log k,v pairs to spendlogs for a call
|
||||||
|
|
||||||
|
|
||||||
class SpendLogsPayload(TypedDict):
|
class SpendLogsPayload(TypedDict):
|
||||||
|
@ -1496,3 +1516,60 @@ class SpendLogsPayload(TypedDict):
|
||||||
request_tags: str # json str
|
request_tags: str # json str
|
||||||
team_id: Optional[str]
|
team_id: Optional[str]
|
||||||
end_user: Optional[str]
|
end_user: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class SpanAttributes(str, enum.Enum):
|
||||||
|
# Note: We've taken this from opentelemetry-semantic-conventions-ai
|
||||||
|
# I chose to not add a new dependency to litellm for this
|
||||||
|
|
||||||
|
# Semantic Conventions for LLM requests, this needs to be removed after
|
||||||
|
# OpenTelemetry Semantic Conventions support Gen AI.
|
||||||
|
# Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
|
||||||
|
# Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
|
||||||
|
|
||||||
|
LLM_SYSTEM = "gen_ai.system"
|
||||||
|
LLM_REQUEST_MODEL = "gen_ai.request.model"
|
||||||
|
LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
|
||||||
|
LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
|
||||||
|
LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
|
||||||
|
LLM_PROMPTS = "gen_ai.prompt"
|
||||||
|
LLM_COMPLETIONS = "gen_ai.completion"
|
||||||
|
LLM_RESPONSE_MODEL = "gen_ai.response.model"
|
||||||
|
LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
|
||||||
|
LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
|
||||||
|
LLM_TOKEN_TYPE = "gen_ai.token.type"
|
||||||
|
# To be added
|
||||||
|
# LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
|
||||||
|
# LLM_RESPONSE_ID = "gen_ai.response.id"
|
||||||
|
|
||||||
|
# LLM
|
||||||
|
LLM_REQUEST_TYPE = "llm.request.type"
|
||||||
|
LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
|
||||||
|
LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
|
||||||
|
LLM_USER = "llm.user"
|
||||||
|
LLM_HEADERS = "llm.headers"
|
||||||
|
LLM_TOP_K = "llm.top_k"
|
||||||
|
LLM_IS_STREAMING = "llm.is_streaming"
|
||||||
|
LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
|
||||||
|
LLM_PRESENCE_PENALTY = "llm.presence_penalty"
|
||||||
|
LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
|
||||||
|
LLM_REQUEST_FUNCTIONS = "llm.request.functions"
|
||||||
|
LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
|
||||||
|
LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
|
||||||
|
LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
|
||||||
|
LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
|
||||||
|
LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
|
||||||
|
LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
|
||||||
|
LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
|
||||||
|
|
||||||
|
|
||||||
|
class ManagementEndpointLoggingPayload(LiteLLMBase):
|
||||||
|
route: str
|
||||||
|
request_data: dict
|
||||||
|
response: Optional[dict] = None
|
||||||
|
exception: Optional[Any] = None
|
||||||
|
start_time: Optional[datetime] = None
|
||||||
|
end_time: Optional[datetime] = None
|
||||||
|
|
|
@ -151,8 +151,8 @@ def common_checks(
|
||||||
and route != "/models"
|
and route != "/models"
|
||||||
):
|
):
|
||||||
if global_proxy_spend > litellm.max_budget:
|
if global_proxy_spend > litellm.max_budget:
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
current_cost=global_proxy_spend, max_budget=litellm.max_budget
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
31
litellm/proxy/common_utils/http_parsing_utils.py
Normal file
31
litellm/proxy/common_utils/http_parsing_utils.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from typing import Optional
|
||||||
|
from fastapi import Request
|
||||||
|
import ast, json
|
||||||
|
|
||||||
|
|
||||||
|
async def _read_request_body(request: Optional[Request]) -> dict:
|
||||||
|
"""
|
||||||
|
Asynchronous function to read the request body and parse it as JSON or literal data.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- request: The request object to read the body from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- dict: Parsed request data as a dictionary
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
request_data: dict = {}
|
||||||
|
if request is None:
|
||||||
|
return request_data
|
||||||
|
body = await request.body()
|
||||||
|
|
||||||
|
if body == b"" or body is None:
|
||||||
|
return request_data
|
||||||
|
body_str = body.decode()
|
||||||
|
try:
|
||||||
|
request_data = ast.literal_eval(body_str)
|
||||||
|
except:
|
||||||
|
request_data = json.loads(body_str)
|
||||||
|
return request_data
|
||||||
|
except:
|
||||||
|
return {}
|
90
litellm/proxy/common_utils/management_endpoint_utils.py
Normal file
90
litellm/proxy/common_utils/management_endpoint_utils.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from functools import wraps
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
|
||||||
|
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||||
|
from fastapi import Request
|
||||||
|
|
||||||
|
|
||||||
|
def management_endpoint_wrapper(func):
|
||||||
|
"""
|
||||||
|
This wrapper does the following:
|
||||||
|
|
||||||
|
1. Log I/O, Exceptions to OTEL
|
||||||
|
2. Create an Audit log for success calls
|
||||||
|
"""
|
||||||
|
|
||||||
|
@wraps(func)
|
||||||
|
async def wrapper(*args, **kwargs):
|
||||||
|
start_time = datetime.now()
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await func(*args, **kwargs)
|
||||||
|
end_time = datetime.now()
|
||||||
|
|
||||||
|
if kwargs is None:
|
||||||
|
kwargs = {}
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = (
|
||||||
|
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||||
|
)
|
||||||
|
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||||
|
if parent_otel_span is not None:
|
||||||
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
|
if open_telemetry_logger is not None:
|
||||||
|
_http_request: Request = kwargs.get("http_request")
|
||||||
|
|
||||||
|
_route = _http_request.url.path
|
||||||
|
_request_body: dict = await _read_request_body(
|
||||||
|
request=_http_request
|
||||||
|
)
|
||||||
|
_response = dict(result) if result is not None else None
|
||||||
|
|
||||||
|
logging_payload = ManagementEndpointLoggingPayload(
|
||||||
|
route=_route,
|
||||||
|
request_data=_request_body,
|
||||||
|
response=_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
await open_telemetry_logger.async_management_endpoint_success_hook(
|
||||||
|
logging_payload=logging_payload,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
end_time = datetime.now()
|
||||||
|
|
||||||
|
if kwargs is None:
|
||||||
|
kwargs = {}
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = (
|
||||||
|
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||||
|
)
|
||||||
|
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||||
|
if parent_otel_span is not None:
|
||||||
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
|
if open_telemetry_logger is not None:
|
||||||
|
_http_request: Request = kwargs.get("http_request")
|
||||||
|
_route = _http_request.url.path
|
||||||
|
_request_body: dict = await _read_request_body(
|
||||||
|
request=_http_request
|
||||||
|
)
|
||||||
|
logging_payload = ManagementEndpointLoggingPayload(
|
||||||
|
route=_route,
|
||||||
|
request_data=_request_body,
|
||||||
|
response=None,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
exception=e,
|
||||||
|
)
|
||||||
|
|
||||||
|
await open_telemetry_logger.async_management_endpoint_failure_hook(
|
||||||
|
logging_payload=logging_payload,
|
||||||
|
parent_otel_span=parent_otel_span,
|
||||||
|
)
|
||||||
|
|
||||||
|
raise e
|
||||||
|
|
||||||
|
return wrapper
|
|
@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
|
||||||
data["cache"][k] = v
|
data["cache"][k] = v
|
||||||
|
|
||||||
verbose_proxy_logger.debug("receiving data: %s", data)
|
verbose_proxy_logger.debug("receiving data: %s", data)
|
||||||
# users can pass in 'user' param to /chat/completions. Don't override it
|
|
||||||
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
|
|
||||||
# if users are using user_api_key_auth, set `user` in `data`
|
|
||||||
data["user"] = user_api_key_dict.user_id
|
|
||||||
|
|
||||||
if "metadata" not in data:
|
if "metadata" not in data:
|
||||||
data["metadata"] = {}
|
data["metadata"] = {}
|
||||||
|
|
63
litellm/proxy/management_helpers/utils.py
Normal file
63
litellm/proxy/management_helpers/utils.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
# What is this?
|
||||||
|
## Helper utils for the management endpoints (keys/users/teams)
|
||||||
|
|
||||||
|
from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
|
||||||
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
import uuid
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
async def add_new_member(
|
||||||
|
new_member: Member,
|
||||||
|
max_budget_in_team: Optional[float],
|
||||||
|
prisma_client: PrismaClient,
|
||||||
|
team_id: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
litellm_proxy_admin_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Add a new member to a team
|
||||||
|
|
||||||
|
- add team id to user table
|
||||||
|
- add team member w/ budget to team member table
|
||||||
|
"""
|
||||||
|
## ADD TEAM ID, to USER TABLE IF NEW ##
|
||||||
|
if new_member.user_id is not None:
|
||||||
|
await prisma_client.db.litellm_usertable.update(
|
||||||
|
where={"user_id": new_member.user_id},
|
||||||
|
data={"teams": {"push": [team_id]}},
|
||||||
|
)
|
||||||
|
elif new_member.user_email is not None:
|
||||||
|
user_data = {"user_id": str(uuid.uuid4()), "user_email": new_member.user_email}
|
||||||
|
## user email is not unique acc. to prisma schema -> future improvement
|
||||||
|
### for now: check if it exists in db, if not - insert it
|
||||||
|
existing_user_row = await prisma_client.get_data(
|
||||||
|
key_val={"user_email": new_member.user_email},
|
||||||
|
table_name="user",
|
||||||
|
query_type="find_all",
|
||||||
|
)
|
||||||
|
if existing_user_row is None or (
|
||||||
|
isinstance(existing_user_row, list) and len(existing_user_row) == 0
|
||||||
|
):
|
||||||
|
|
||||||
|
await prisma_client.insert_data(data=user_data, table_name="user")
|
||||||
|
|
||||||
|
# Check if trying to set a budget for team member
|
||||||
|
if max_budget_in_team is not None and new_member.user_id is not None:
|
||||||
|
# create a new budget item for this member
|
||||||
|
response = await prisma_client.db.litellm_budgettable.create(
|
||||||
|
data={
|
||||||
|
"max_budget": max_budget_in_team,
|
||||||
|
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||||
|
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
_budget_id = response.budget_id
|
||||||
|
await prisma_client.db.litellm_teammembership.create(
|
||||||
|
data={
|
||||||
|
"team_id": team_id,
|
||||||
|
"user_id": new_member.user_id,
|
||||||
|
"budget_id": _budget_id,
|
||||||
|
}
|
||||||
|
)
|
|
@ -14,10 +14,9 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
- model_name: my-triton-model
|
- model_name: mistral-embed
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: triton/any"
|
model: mistral/mistral-embed
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
|
@ -90,6 +90,7 @@ from litellm.types.llms.openai import (
|
||||||
HttpxBinaryResponseContent,
|
HttpxBinaryResponseContent,
|
||||||
)
|
)
|
||||||
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||||
|
from litellm.proxy.management_helpers.utils import add_new_member
|
||||||
from litellm.proxy.utils import (
|
from litellm.proxy.utils import (
|
||||||
PrismaClient,
|
PrismaClient,
|
||||||
DBClient,
|
DBClient,
|
||||||
|
@ -102,7 +103,6 @@ from litellm.proxy.utils import (
|
||||||
hash_token,
|
hash_token,
|
||||||
html_form,
|
html_form,
|
||||||
missing_keys_html_form,
|
missing_keys_html_form,
|
||||||
_read_request_body,
|
|
||||||
_is_valid_team_configs,
|
_is_valid_team_configs,
|
||||||
_is_user_proxy_admin,
|
_is_user_proxy_admin,
|
||||||
_get_user_role,
|
_get_user_role,
|
||||||
|
@ -114,6 +114,8 @@ from litellm.proxy.utils import (
|
||||||
_to_ns,
|
_to_ns,
|
||||||
get_error_message_str,
|
get_error_message_str,
|
||||||
)
|
)
|
||||||
|
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||||
|
|
||||||
from litellm import (
|
from litellm import (
|
||||||
CreateBatchRequest,
|
CreateBatchRequest,
|
||||||
RetrieveBatchRequest,
|
RetrieveBatchRequest,
|
||||||
|
@ -160,6 +162,10 @@ from litellm.proxy.auth.auth_checks import (
|
||||||
get_user_object,
|
get_user_object,
|
||||||
allowed_routes_check,
|
allowed_routes_check,
|
||||||
get_actual_routes,
|
get_actual_routes,
|
||||||
|
log_to_opentelemetry,
|
||||||
|
)
|
||||||
|
from litellm.proxy.common_utils.management_endpoint_utils import (
|
||||||
|
management_endpoint_wrapper,
|
||||||
)
|
)
|
||||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||||
from litellm.exceptions import RejectedRequestError
|
from litellm.exceptions import RejectedRequestError
|
||||||
|
@ -368,6 +374,11 @@ from typing import Dict
|
||||||
api_key_header = APIKeyHeader(
|
api_key_header = APIKeyHeader(
|
||||||
name="Authorization", auto_error=False, description="Bearer token"
|
name="Authorization", auto_error=False, description="Bearer token"
|
||||||
)
|
)
|
||||||
|
azure_api_key_header = APIKeyHeader(
|
||||||
|
name="API-Key",
|
||||||
|
auto_error=False,
|
||||||
|
description="Some older versions of the openai Python package will send an API-Key header with just the API key ",
|
||||||
|
)
|
||||||
user_api_base = None
|
user_api_base = None
|
||||||
user_model = None
|
user_model = None
|
||||||
user_debug = False
|
user_debug = False
|
||||||
|
@ -508,18 +519,27 @@ async def check_request_disconnection(request: Request, llm_api_call_task):
|
||||||
|
|
||||||
|
|
||||||
async def user_api_key_auth(
|
async def user_api_key_auth(
|
||||||
request: Request, api_key: str = fastapi.Security(api_key_header)
|
request: Request,
|
||||||
|
api_key: str = fastapi.Security(api_key_header),
|
||||||
|
azure_api_key_header: str = fastapi.Security(azure_api_key_header),
|
||||||
) -> UserAPIKeyAuth:
|
) -> UserAPIKeyAuth:
|
||||||
global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj
|
global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj
|
||||||
try:
|
try:
|
||||||
if isinstance(api_key, str):
|
if isinstance(api_key, str):
|
||||||
passed_in_key = api_key
|
passed_in_key = api_key
|
||||||
api_key = _get_bearer_token(api_key=api_key)
|
api_key = _get_bearer_token(api_key=api_key)
|
||||||
|
|
||||||
|
elif isinstance(azure_api_key_header, str):
|
||||||
|
api_key = azure_api_key_header
|
||||||
|
|
||||||
parent_otel_span: Optional[Span] = None
|
parent_otel_span: Optional[Span] = None
|
||||||
if open_telemetry_logger is not None:
|
if open_telemetry_logger is not None:
|
||||||
parent_otel_span = open_telemetry_logger.tracer.start_span(
|
parent_otel_span = open_telemetry_logger.tracer.start_span(
|
||||||
name="Received Proxy Server Request",
|
name="Received Proxy Server Request",
|
||||||
start_time=_to_ns(datetime.now()),
|
start_time=_to_ns(datetime.now()),
|
||||||
|
context=open_telemetry_logger.get_traceparent_from_header(
|
||||||
|
headers=request.headers
|
||||||
|
),
|
||||||
)
|
)
|
||||||
### USER-DEFINED AUTH FUNCTION ###
|
### USER-DEFINED AUTH FUNCTION ###
|
||||||
if user_custom_auth is not None:
|
if user_custom_auth is not None:
|
||||||
|
@ -1062,8 +1082,9 @@ async def user_api_key_auth(
|
||||||
|
|
||||||
_user_id = _user.get("user_id", None)
|
_user_id = _user.get("user_id", None)
|
||||||
if user_current_spend > user_max_budget:
|
if user_current_spend > user_max_budget:
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
current_cost=user_current_spend,
|
||||||
|
max_budget=user_max_budget,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Token exists, not expired now check if its in budget for the user
|
# Token exists, not expired now check if its in budget for the user
|
||||||
|
@ -1094,9 +1115,11 @@ async def user_api_key_auth(
|
||||||
)
|
)
|
||||||
|
|
||||||
if user_current_spend > user_max_budget:
|
if user_current_spend > user_max_budget:
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
current_cost=user_current_spend,
|
||||||
|
max_budget=user_max_budget,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check 3. Check if user is in their team budget
|
# Check 3. Check if user is in their team budget
|
||||||
if valid_token.team_member_spend is not None:
|
if valid_token.team_member_spend is not None:
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
|
@ -1130,8 +1153,9 @@ async def user_api_key_auth(
|
||||||
)
|
)
|
||||||
if team_member_budget is not None and team_member_budget > 0:
|
if team_member_budget is not None and team_member_budget > 0:
|
||||||
if valid_token.team_member_spend > team_member_budget:
|
if valid_token.team_member_spend > team_member_budget:
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededBudget: Crossed spend within team. UserID: {valid_token.user_id}, in team {valid_token.team_id} has exceeded their budget. Current spend: {valid_token.team_member_spend}; Max Budget: {team_member_budget}"
|
current_cost=valid_token.team_member_spend,
|
||||||
|
max_budget=team_member_budget,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check 3. If token is expired
|
# Check 3. If token is expired
|
||||||
|
@ -1189,8 +1213,9 @@ async def user_api_key_auth(
|
||||||
####################################
|
####################################
|
||||||
|
|
||||||
if valid_token.spend >= valid_token.max_budget:
|
if valid_token.spend >= valid_token.max_budget:
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
current_cost=valid_token.spend,
|
||||||
|
max_budget=valid_token.max_budget,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check 5. Token Model Spend is under Model budget
|
# Check 5. Token Model Spend is under Model budget
|
||||||
|
@ -1226,8 +1251,9 @@ async def user_api_key_auth(
|
||||||
):
|
):
|
||||||
current_model_spend = model_spend[0]["_sum"]["spend"]
|
current_model_spend = model_spend[0]["_sum"]["spend"]
|
||||||
current_model_budget = max_budget_per_model[current_model]
|
current_model_budget = max_budget_per_model[current_model]
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
|
current_cost=current_model_spend,
|
||||||
|
max_budget=current_model_budget,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check 6. Team spend is under Team budget
|
# Check 6. Team spend is under Team budget
|
||||||
|
@ -1251,8 +1277,9 @@ async def user_api_key_auth(
|
||||||
)
|
)
|
||||||
|
|
||||||
if valid_token.team_spend >= valid_token.team_max_budget:
|
if valid_token.team_spend >= valid_token.team_max_budget:
|
||||||
raise Exception(
|
raise litellm.BudgetExceededError(
|
||||||
f"ExceededTokenBudget: Current Team Spend: {valid_token.team_spend}; Max Budget for Team: {valid_token.team_max_budget}"
|
current_cost=valid_token.team_spend,
|
||||||
|
max_budget=valid_token.team_max_budget,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check 8: Additional Common Checks across jwt + key auth
|
# Check 8: Additional Common Checks across jwt + key auth
|
||||||
|
@ -1495,7 +1522,7 @@ async def user_api_key_auth(
|
||||||
)
|
)
|
||||||
if valid_token is None:
|
if valid_token is None:
|
||||||
# No token was found when looking up in the DB
|
# No token was found when looking up in the DB
|
||||||
raise Exception("Invalid token passed")
|
raise Exception("Invalid proxy server token passed")
|
||||||
if valid_token_dict is not None:
|
if valid_token_dict is not None:
|
||||||
if user_id_information is not None and _is_user_proxy_admin(
|
if user_id_information is not None and _is_user_proxy_admin(
|
||||||
user_id_information
|
user_id_information
|
||||||
|
@ -1528,6 +1555,14 @@ async def user_api_key_auth(
|
||||||
str(e)
|
str(e)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log this exception to OTEL
|
||||||
|
if open_telemetry_logger is not None:
|
||||||
|
await open_telemetry_logger.async_post_call_failure_hook(
|
||||||
|
original_exception=e,
|
||||||
|
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
|
||||||
|
)
|
||||||
|
|
||||||
verbose_proxy_logger.debug(traceback.format_exc())
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, litellm.BudgetExceededError):
|
if isinstance(e, litellm.BudgetExceededError):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
|
@ -7803,6 +7838,10 @@ async def get_global_spend_report(
|
||||||
default=None,
|
default=None,
|
||||||
description="Time till which to view spend",
|
description="Time till which to view spend",
|
||||||
),
|
),
|
||||||
|
group_by: Optional[Literal["team", "customer"]] = fastapi.Query(
|
||||||
|
default="team",
|
||||||
|
description="Group spend by internal team or customer",
|
||||||
|
),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
|
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
|
||||||
|
@ -7849,69 +7888,130 @@ async def get_global_spend_report(
|
||||||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||||
)
|
)
|
||||||
|
|
||||||
# first get data from spend logs -> SpendByModelApiKey
|
if group_by == "team":
|
||||||
# then read data from "SpendByModelApiKey" to format the response obj
|
# first get data from spend logs -> SpendByModelApiKey
|
||||||
sql_query = """
|
# then read data from "SpendByModelApiKey" to format the response obj
|
||||||
|
sql_query = """
|
||||||
|
|
||||||
WITH SpendByModelApiKey AS (
|
WITH SpendByModelApiKey AS (
|
||||||
SELECT
|
SELECT
|
||||||
date_trunc('day', sl."startTime") AS group_by_day,
|
date_trunc('day', sl."startTime") AS group_by_day,
|
||||||
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
|
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
|
||||||
sl.model,
|
sl.model,
|
||||||
sl.api_key,
|
sl.api_key,
|
||||||
SUM(sl.spend) AS model_api_spend,
|
SUM(sl.spend) AS model_api_spend,
|
||||||
SUM(sl.total_tokens) AS model_api_tokens
|
SUM(sl.total_tokens) AS model_api_tokens
|
||||||
FROM
|
FROM
|
||||||
"LiteLLM_SpendLogs" sl
|
"LiteLLM_SpendLogs" sl
|
||||||
LEFT JOIN
|
LEFT JOIN
|
||||||
"LiteLLM_TeamTable" tt
|
"LiteLLM_TeamTable" tt
|
||||||
ON
|
ON
|
||||||
sl.team_id = tt.team_id
|
sl.team_id = tt.team_id
|
||||||
WHERE
|
WHERE
|
||||||
sl."startTime" BETWEEN $1::date AND $2::date
|
sl."startTime" BETWEEN $1::date AND $2::date
|
||||||
GROUP BY
|
GROUP BY
|
||||||
date_trunc('day', sl."startTime"),
|
date_trunc('day', sl."startTime"),
|
||||||
tt.team_alias,
|
tt.team_alias,
|
||||||
sl.model,
|
sl.model,
|
||||||
sl.api_key
|
sl.api_key
|
||||||
)
|
)
|
||||||
|
SELECT
|
||||||
|
group_by_day,
|
||||||
|
jsonb_agg(jsonb_build_object(
|
||||||
|
'team_name', team_name,
|
||||||
|
'total_spend', total_spend,
|
||||||
|
'metadata', metadata
|
||||||
|
)) AS teams
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
group_by_day,
|
||||||
|
team_name,
|
||||||
|
SUM(model_api_spend) AS total_spend,
|
||||||
|
jsonb_agg(jsonb_build_object(
|
||||||
|
'model', model,
|
||||||
|
'api_key', api_key,
|
||||||
|
'spend', model_api_spend,
|
||||||
|
'total_tokens', model_api_tokens
|
||||||
|
)) AS metadata
|
||||||
|
FROM
|
||||||
|
SpendByModelApiKey
|
||||||
|
GROUP BY
|
||||||
|
group_by_day,
|
||||||
|
team_name
|
||||||
|
) AS aggregated
|
||||||
|
GROUP BY
|
||||||
|
group_by_day
|
||||||
|
ORDER BY
|
||||||
|
group_by_day;
|
||||||
|
"""
|
||||||
|
|
||||||
|
db_response = await prisma_client.db.query_raw(
|
||||||
|
sql_query, start_date_obj, end_date_obj
|
||||||
|
)
|
||||||
|
if db_response is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return db_response
|
||||||
|
|
||||||
|
elif group_by == "customer":
|
||||||
|
sql_query = """
|
||||||
|
|
||||||
|
WITH SpendByModelApiKey AS (
|
||||||
|
SELECT
|
||||||
|
date_trunc('day', sl."startTime") AS group_by_day,
|
||||||
|
sl.end_user AS customer,
|
||||||
|
sl.model,
|
||||||
|
sl.api_key,
|
||||||
|
SUM(sl.spend) AS model_api_spend,
|
||||||
|
SUM(sl.total_tokens) AS model_api_tokens
|
||||||
|
FROM
|
||||||
|
"LiteLLM_SpendLogs" sl
|
||||||
|
WHERE
|
||||||
|
sl."startTime" BETWEEN $1::date AND $2::date
|
||||||
|
GROUP BY
|
||||||
|
date_trunc('day', sl."startTime"),
|
||||||
|
customer,
|
||||||
|
sl.model,
|
||||||
|
sl.api_key
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
group_by_day,
|
group_by_day,
|
||||||
jsonb_agg(jsonb_build_object(
|
jsonb_agg(jsonb_build_object(
|
||||||
'team_name', team_name,
|
'customer', customer,
|
||||||
'total_spend', total_spend,
|
'total_spend', total_spend,
|
||||||
'metadata', metadata
|
'metadata', metadata
|
||||||
)) AS teams
|
)) AS customers
|
||||||
FROM (
|
FROM
|
||||||
SELECT
|
(
|
||||||
group_by_day,
|
SELECT
|
||||||
team_name,
|
group_by_day,
|
||||||
SUM(model_api_spend) AS total_spend,
|
customer,
|
||||||
jsonb_agg(jsonb_build_object(
|
SUM(model_api_spend) AS total_spend,
|
||||||
'model', model,
|
jsonb_agg(jsonb_build_object(
|
||||||
'api_key', api_key,
|
'model', model,
|
||||||
'spend', model_api_spend,
|
'api_key', api_key,
|
||||||
'total_tokens', model_api_tokens
|
'spend', model_api_spend,
|
||||||
)) AS metadata
|
'total_tokens', model_api_tokens
|
||||||
FROM
|
)) AS metadata
|
||||||
SpendByModelApiKey
|
FROM
|
||||||
GROUP BY
|
SpendByModelApiKey
|
||||||
group_by_day,
|
GROUP BY
|
||||||
team_name
|
group_by_day,
|
||||||
) AS aggregated
|
customer
|
||||||
|
) AS aggregated
|
||||||
GROUP BY
|
GROUP BY
|
||||||
group_by_day
|
group_by_day
|
||||||
ORDER BY
|
ORDER BY
|
||||||
group_by_day;
|
group_by_day;
|
||||||
"""
|
"""
|
||||||
|
|
||||||
db_response = await prisma_client.db.query_raw(
|
db_response = await prisma_client.db.query_raw(
|
||||||
sql_query, start_date_obj, end_date_obj
|
sql_query, start_date_obj, end_date_obj
|
||||||
)
|
)
|
||||||
if db_response is None:
|
if db_response is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
return db_response
|
return db_response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -8097,7 +8197,9 @@ async def _get_spend_report_for_time_range(
|
||||||
|
|
||||||
return response, spend_per_tag
|
return response, spend_per_tag
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error("Exception in _get_daily_spend_reports", e) # noqa
|
verbose_proxy_logger.error(
|
||||||
|
"Exception in _get_daily_spend_reports {}".format(str(e))
|
||||||
|
) # noqa
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
|
@ -8755,7 +8857,7 @@ async def new_user(data: NewUserRequest):
|
||||||
- organization_id: Optional[str] - specify the org a user belongs to.
|
- organization_id: Optional[str] - specify the org a user belongs to.
|
||||||
- user_email: Optional[str] - Specify a user email.
|
- user_email: Optional[str] - Specify a user email.
|
||||||
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
|
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
|
||||||
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
|
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
|
||||||
- max_budget: Optional[float] - Specify max budget for a given user.
|
- max_budget: Optional[float] - Specify max budget for a given user.
|
||||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
||||||
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
||||||
|
@ -8790,7 +8892,10 @@ async def new_user(data: NewUserRequest):
|
||||||
role="user",
|
role="user",
|
||||||
user_email=data_json.get("user_email", None),
|
user_email=data_json.get("user_email", None),
|
||||||
),
|
),
|
||||||
)
|
),
|
||||||
|
http_request=Request(
|
||||||
|
scope={"type": "http"},
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if data.send_invite_email is True:
|
if data.send_invite_email is True:
|
||||||
|
@ -9823,8 +9928,10 @@ async def delete_end_user(
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
response_model=LiteLLM_TeamTable,
|
response_model=LiteLLM_TeamTable,
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def new_team(
|
async def new_team(
|
||||||
data: NewTeamRequest,
|
data: NewTeamRequest,
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
litellm_changed_by: Optional[str] = Header(
|
litellm_changed_by: Optional[str] = Header(
|
||||||
None,
|
None,
|
||||||
|
@ -10058,6 +10165,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
||||||
@router.post(
|
@router.post(
|
||||||
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def update_team(
|
async def update_team(
|
||||||
data: UpdateTeamRequest,
|
data: UpdateTeamRequest,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
@ -10163,8 +10271,10 @@ async def update_team(
|
||||||
tags=["team management"],
|
tags=["team management"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def team_member_add(
|
async def team_member_add(
|
||||||
data: TeamMemberAddRequest,
|
data: TeamMemberAddRequest,
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -10190,10 +10300,12 @@ async def team_member_add(
|
||||||
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
||||||
|
|
||||||
if data.member is None:
|
if data.member is None:
|
||||||
raise HTTPException(status_code=400, detail={"error": "No member passed in"})
|
raise HTTPException(
|
||||||
|
status_code=400, detail={"error": "No member/members passed in"}
|
||||||
|
)
|
||||||
|
|
||||||
existing_team_row = await prisma_client.get_data( # type: ignore
|
existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
|
||||||
team_id=data.team_id, table_name="team", query_type="find_unique"
|
where={"team_id": data.team_id}
|
||||||
)
|
)
|
||||||
if existing_team_row is None:
|
if existing_team_row is None:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -10203,75 +10315,50 @@ async def team_member_add(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
new_member = data.member
|
complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
|
||||||
|
|
||||||
existing_team_row.members_with_roles.append(new_member)
|
if isinstance(data.member, Member):
|
||||||
|
# add to team db
|
||||||
|
new_member = data.member
|
||||||
|
|
||||||
complete_team_data = LiteLLM_TeamTable(
|
complete_team_data.members_with_roles.append(new_member)
|
||||||
**_get_pydantic_json_dict(existing_team_row),
|
|
||||||
|
elif isinstance(data.member, List):
|
||||||
|
# add to team db
|
||||||
|
new_members = data.member
|
||||||
|
|
||||||
|
complete_team_data.members_with_roles.extend(new_members)
|
||||||
|
|
||||||
|
# ADD MEMBER TO TEAM
|
||||||
|
_db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
|
||||||
|
updated_team = await prisma_client.db.litellm_teamtable.update(
|
||||||
|
where={"team_id": data.team_id},
|
||||||
|
data={"members_with_roles": json.dumps(_db_team_members)}, # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
team_row = await prisma_client.update_data(
|
if isinstance(data.member, Member):
|
||||||
update_key_values=complete_team_data.json(exclude_none=True),
|
await add_new_member(
|
||||||
data=complete_team_data.json(exclude_none=True),
|
new_member=data.member,
|
||||||
table_name="team",
|
max_budget_in_team=data.max_budget_in_team,
|
||||||
team_id=data.team_id,
|
prisma_client=prisma_client,
|
||||||
)
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||||
## ADD USER, IF NEW ##
|
team_id=data.team_id,
|
||||||
user_data = { # type: ignore
|
|
||||||
"teams": [team_row["team_id"]],
|
|
||||||
"models": team_row["data"].models,
|
|
||||||
}
|
|
||||||
if new_member.user_id is not None:
|
|
||||||
user_data["user_id"] = new_member.user_id # type: ignore
|
|
||||||
await prisma_client.update_data(
|
|
||||||
user_id=new_member.user_id,
|
|
||||||
data=user_data,
|
|
||||||
update_key_values_custom_query={
|
|
||||||
"teams": {
|
|
||||||
"push": [team_row["team_id"]],
|
|
||||||
}
|
|
||||||
},
|
|
||||||
table_name="user",
|
|
||||||
)
|
)
|
||||||
elif new_member.user_email is not None:
|
elif isinstance(data.member, List):
|
||||||
user_data["user_id"] = str(uuid.uuid4())
|
tasks: List = []
|
||||||
user_data["user_email"] = new_member.user_email
|
for m in data.member:
|
||||||
## user email is not unique acc. to prisma schema -> future improvement
|
await add_new_member(
|
||||||
### for now: check if it exists in db, if not - insert it
|
new_member=m,
|
||||||
existing_user_row = await prisma_client.get_data(
|
max_budget_in_team=data.max_budget_in_team,
|
||||||
key_val={"user_email": new_member.user_email},
|
prisma_client=prisma_client,
|
||||||
table_name="user",
|
user_api_key_dict=user_api_key_dict,
|
||||||
query_type="find_all",
|
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||||
)
|
team_id=data.team_id,
|
||||||
if existing_user_row is None or (
|
)
|
||||||
isinstance(existing_user_row, list) and len(existing_user_row) == 0
|
await asyncio.gather(*tasks)
|
||||||
):
|
|
||||||
|
|
||||||
await prisma_client.insert_data(data=user_data, table_name="user")
|
return updated_team
|
||||||
|
|
||||||
# Check if trying to set a budget for team member
|
|
||||||
if data.max_budget_in_team is not None and new_member.user_id is not None:
|
|
||||||
# create a new budget item for this member
|
|
||||||
response = await prisma_client.db.litellm_budgettable.create(
|
|
||||||
data={
|
|
||||||
"max_budget": data.max_budget_in_team,
|
|
||||||
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
|
||||||
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
_budget_id = response.budget_id
|
|
||||||
await prisma_client.db.litellm_teammembership.create(
|
|
||||||
data={
|
|
||||||
"team_id": data.team_id,
|
|
||||||
"user_id": new_member.user_id,
|
|
||||||
"budget_id": _budget_id,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return team_row
|
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
|
@ -10279,8 +10366,10 @@ async def team_member_add(
|
||||||
tags=["team management"],
|
tags=["team management"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def team_member_delete(
|
async def team_member_delete(
|
||||||
data: TeamMemberDeleteRequest,
|
data: TeamMemberDeleteRequest,
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -10384,8 +10473,10 @@ async def team_member_delete(
|
||||||
@router.post(
|
@router.post(
|
||||||
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def delete_team(
|
async def delete_team(
|
||||||
data: DeleteTeamRequest,
|
data: DeleteTeamRequest,
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
litellm_changed_by: Optional[str] = Header(
|
litellm_changed_by: Optional[str] = Header(
|
||||||
None,
|
None,
|
||||||
|
@ -10469,10 +10560,12 @@ async def delete_team(
|
||||||
@router.get(
|
@router.get(
|
||||||
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def team_info(
|
async def team_info(
|
||||||
|
http_request: Request,
|
||||||
team_id: str = fastapi.Query(
|
team_id: str = fastapi.Query(
|
||||||
default=None, description="Team ID in the request parameters"
|
default=None, description="Team ID in the request parameters"
|
||||||
)
|
),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
get info on team + related keys
|
get info on team + related keys
|
||||||
|
@ -10556,8 +10649,10 @@ async def team_info(
|
||||||
@router.post(
|
@router.post(
|
||||||
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def block_team(
|
async def block_team(
|
||||||
data: BlockTeamRequest,
|
data: BlockTeamRequest,
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -10578,8 +10673,10 @@ async def block_team(
|
||||||
@router.post(
|
@router.post(
|
||||||
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def unblock_team(
|
async def unblock_team(
|
||||||
data: BlockTeamRequest,
|
data: BlockTeamRequest,
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -10600,7 +10697,9 @@ async def unblock_team(
|
||||||
@router.get(
|
@router.get(
|
||||||
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
async def list_team(
|
async def list_team(
|
||||||
|
http_request: Request,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -13007,7 +13106,9 @@ async def auth_callback(request: Request):
|
||||||
user_role = getattr(result, generic_user_role_attribute_name, None)
|
user_role = getattr(result, generic_user_role_attribute_name, None)
|
||||||
|
|
||||||
if user_id is None:
|
if user_id is None:
|
||||||
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
|
_first_name = getattr(result, "first_name", "") or ""
|
||||||
|
_last_name = getattr(result, "last_name", "") or ""
|
||||||
|
user_id = _first_name + _last_name
|
||||||
|
|
||||||
user_info = None
|
user_info = None
|
||||||
user_id_models: List = []
|
user_id_models: List = []
|
||||||
|
|
|
@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
|
||||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||||
model_spend Json @default("{}")
|
model_spend Json @default("{}")
|
||||||
model_max_budget Json @default("{}")
|
model_max_budget Json @default("{}")
|
||||||
model_id Int? @unique
|
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
|
||||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||||
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
||||||
}
|
}
|
||||||
|
|
41
litellm/proxy/tests/test_openai_request_with_traceparent.py
Normal file
41
litellm/proxy/tests/test_openai_request_with_traceparent.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
# mypy: ignore-errors
|
||||||
|
import openai
|
||||||
|
from opentelemetry import trace
|
||||||
|
from opentelemetry.context import Context
|
||||||
|
from opentelemetry.trace import SpanKind
|
||||||
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||||
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||||
|
|
||||||
|
|
||||||
|
trace.set_tracer_provider(TracerProvider())
|
||||||
|
memory_exporter = InMemorySpanExporter()
|
||||||
|
span_processor = SimpleSpanProcessor(memory_exporter)
|
||||||
|
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
# create an otel traceparent header
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
with tracer.start_as_current_span("ishaan-local-dev-app") as span:
|
||||||
|
span.set_attribute("generation_name", "ishaan-generation-openai-client")
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
extra_headers = {}
|
||||||
|
context = trace.set_span_in_context(span)
|
||||||
|
traceparent = TraceContextTextMapPropagator()
|
||||||
|
traceparent.inject(carrier=extra_headers, context=context)
|
||||||
|
print("EXTRA HEADERS: ", extra_headers)
|
||||||
|
_trace_parent = extra_headers.get("traceparent")
|
||||||
|
trace_id = _trace_parent.split("-")[1]
|
||||||
|
print("Trace ID: ", trace_id)
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="llama3",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||||
|
],
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
21
litellm/proxy/tests/test_simple_traceparent_openai.py
Normal file
21
litellm/proxy/tests/test_simple_traceparent_openai.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# mypy: ignore-errors
|
||||||
|
import openai
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
|
||||||
|
extra_headers = {"traceparent": example_traceparent}
|
||||||
|
_trace_id = example_traceparent.split("-")[1]
|
||||||
|
|
||||||
|
print("EXTRA HEADERS: ", extra_headers)
|
||||||
|
print("Trace ID: ", _trace_id)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="llama3",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||||
|
],
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
|
@ -48,6 +48,7 @@ from datetime import datetime, timedelta
|
||||||
from litellm.integrations.slack_alerting import SlackAlerting
|
from litellm.integrations.slack_alerting import SlackAlerting
|
||||||
from typing_extensions import overload
|
from typing_extensions import overload
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
from fastapi import Request
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
@ -2017,6 +2018,7 @@ def get_logging_payload(
|
||||||
user_api_key_team_id=None,
|
user_api_key_team_id=None,
|
||||||
user_api_key_user_id=None,
|
user_api_key_user_id=None,
|
||||||
user_api_key_team_alias=None,
|
user_api_key_team_alias=None,
|
||||||
|
spend_logs_metadata=None,
|
||||||
)
|
)
|
||||||
if isinstance(metadata, dict):
|
if isinstance(metadata, dict):
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
|
@ -2595,36 +2597,6 @@ async def update_spend(
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
async def _read_request_body(request):
|
|
||||||
"""
|
|
||||||
Asynchronous function to read the request body and parse it as JSON or literal data.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- request: The request object to read the body from
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- dict: Parsed request data as a dictionary
|
|
||||||
"""
|
|
||||||
import ast, json
|
|
||||||
|
|
||||||
try:
|
|
||||||
request_data = {}
|
|
||||||
if request is None:
|
|
||||||
return request_data
|
|
||||||
body = await request.body()
|
|
||||||
|
|
||||||
if body == b"" or body is None:
|
|
||||||
return request_data
|
|
||||||
body_str = body.decode()
|
|
||||||
try:
|
|
||||||
request_data = ast.literal_eval(body_str)
|
|
||||||
except:
|
|
||||||
request_data = json.loads(body_str)
|
|
||||||
return request_data
|
|
||||||
except:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def _is_projected_spend_over_limit(
|
def _is_projected_spend_over_limit(
|
||||||
current_spend: float, soft_budget_limit: Optional[float]
|
current_spend: float, soft_budget_limit: Optional[float]
|
||||||
):
|
):
|
||||||
|
|
|
@ -2057,11 +2057,14 @@ class Router:
|
||||||
generic_fallback_idx: Optional[int] = None
|
generic_fallback_idx: Optional[int] = None
|
||||||
## check for specific model group-specific fallbacks
|
## check for specific model group-specific fallbacks
|
||||||
for idx, item in enumerate(fallbacks):
|
for idx, item in enumerate(fallbacks):
|
||||||
if list(item.keys())[0] == model_group:
|
if isinstance(item, dict):
|
||||||
fallback_model_group = item[model_group]
|
if list(item.keys())[0] == model_group:
|
||||||
break
|
fallback_model_group = item[model_group]
|
||||||
elif list(item.keys())[0] == "*":
|
break
|
||||||
generic_fallback_idx = idx
|
elif list(item.keys())[0] == "*":
|
||||||
|
generic_fallback_idx = idx
|
||||||
|
elif isinstance(item, str):
|
||||||
|
fallback_model_group = [fallbacks.pop(idx)]
|
||||||
## if none, check for generic fallback
|
## if none, check for generic fallback
|
||||||
if (
|
if (
|
||||||
fallback_model_group is None
|
fallback_model_group is None
|
||||||
|
@ -2310,13 +2313,15 @@ class Router:
|
||||||
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
|
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
|
||||||
fallback_model_group = None
|
fallback_model_group = None
|
||||||
generic_fallback_idx: Optional[int] = None
|
generic_fallback_idx: Optional[int] = None
|
||||||
## check for specific model group-specific fallbacks
|
|
||||||
for idx, item in enumerate(fallbacks):
|
for idx, item in enumerate(fallbacks):
|
||||||
if list(item.keys())[0] == model_group:
|
if isinstance(item, dict):
|
||||||
fallback_model_group = item[model_group]
|
if list(item.keys())[0] == model_group:
|
||||||
break
|
fallback_model_group = item[model_group]
|
||||||
elif list(item.keys())[0] == "*":
|
break
|
||||||
generic_fallback_idx = idx
|
elif list(item.keys())[0] == "*":
|
||||||
|
generic_fallback_idx = idx
|
||||||
|
elif isinstance(item, str):
|
||||||
|
fallback_model_group = [fallbacks.pop(idx)]
|
||||||
## if none, check for generic fallback
|
## if none, check for generic fallback
|
||||||
if (
|
if (
|
||||||
fallback_model_group is None
|
fallback_model_group is None
|
||||||
|
|
|
@ -810,6 +810,28 @@ def test_vertexai_embedding():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_vertexai_embedding_embedding_latest():
|
||||||
|
try:
|
||||||
|
load_vertex_ai_credentials()
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
response = embedding(
|
||||||
|
model="vertex_ai/text-embedding-004",
|
||||||
|
input=["hi"],
|
||||||
|
dimensions=1,
|
||||||
|
auto_truncate=True,
|
||||||
|
task_type="RETRIEVAL_QUERY",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(response.data[0]["embedding"]) == 1
|
||||||
|
assert response.usage.prompt_tokens > 0
|
||||||
|
print(f"response:", response)
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_vertexai_aembedding():
|
async def test_vertexai_aembedding():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -220,13 +220,13 @@ def test_completion_bedrock_claude_sts_oidc_auth():
|
||||||
aws_web_identity_token = "oidc/circleci_v2/"
|
aws_web_identity_token = "oidc/circleci_v2/"
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||||
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
|
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||||
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
response = completion(
|
response_1 = completion(
|
||||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
|
@ -236,8 +236,40 @@ def test_completion_bedrock_claude_sts_oidc_auth():
|
||||||
aws_role_name=aws_role_name,
|
aws_role_name=aws_role_name,
|
||||||
aws_session_name="my-test-session",
|
aws_session_name="my-test-session",
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
print(response_1)
|
||||||
print(response)
|
assert len(response_1.choices) > 0
|
||||||
|
assert len(response_1.choices[0].message.content) > 0
|
||||||
|
|
||||||
|
# This second call is to verify that the cache isn't breaking anything
|
||||||
|
response_2 = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=5,
|
||||||
|
temperature=0.2,
|
||||||
|
aws_region_name=aws_region_name,
|
||||||
|
aws_web_identity_token=aws_web_identity_token,
|
||||||
|
aws_role_name=aws_role_name,
|
||||||
|
aws_session_name="my-test-session",
|
||||||
|
)
|
||||||
|
print(response_2)
|
||||||
|
assert len(response_2.choices) > 0
|
||||||
|
assert len(response_2.choices[0].message.content) > 0
|
||||||
|
|
||||||
|
# This third call is to verify that the cache isn't used for a different region
|
||||||
|
response_3 = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=6,
|
||||||
|
temperature=0.3,
|
||||||
|
aws_region_name="us-east-1",
|
||||||
|
aws_web_identity_token=aws_web_identity_token,
|
||||||
|
aws_role_name=aws_role_name,
|
||||||
|
aws_session_name="my-test-session",
|
||||||
|
)
|
||||||
|
print(response_3)
|
||||||
|
assert len(response_3.choices) > 0
|
||||||
|
assert len(response_3.choices[0].message.content) > 0
|
||||||
|
|
||||||
except RateLimitError:
|
except RateLimitError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -255,7 +287,7 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
|
||||||
aws_web_identity_token = "oidc/circleci_v2/"
|
aws_web_identity_token = "oidc/circleci_v2/"
|
||||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||||
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
|
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||||
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -16,7 +16,7 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||||
|
|
||||||
# litellm.num_retries=3
|
# litellm.num_retries = 3
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
user_message = "Write a short poem about the sky"
|
user_message = "Write a short poem about the sky"
|
||||||
|
@ -114,6 +114,27 @@ def test_null_role_response():
|
||||||
assert response.choices[0].message.role == "assistant"
|
assert response.choices[0].message.role == "assistant"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_azure_ai_command_r():
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
os.environ["AZURE_AI_API_BASE"] = os.getenv("AZURE_COHERE_API_BASE", "")
|
||||||
|
os.environ["AZURE_AI_API_KEY"] = os.getenv("AZURE_COHERE_API_KEY", "")
|
||||||
|
|
||||||
|
response: litellm.ModelResponse = completion(
|
||||||
|
model="azure_ai/command-r-plus",
|
||||||
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
assert "azure_ai" in response.model
|
||||||
|
except litellm.Timeout as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_azure_command_r():
|
def test_completion_azure_command_r():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -530,6 +551,7 @@ def test_completion_cohere_command_r_plus_function_call():
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
|
force_single_step=True,
|
||||||
)
|
)
|
||||||
print(second_response)
|
print(second_response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -720,7 +742,11 @@ def test_completion_claude_3_function_plus_image():
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
|
|
||||||
def test_completion_azure_mistral_large_function_calling():
|
@pytest.mark.parametrize(
|
||||||
|
"provider",
|
||||||
|
["azure", "azure_ai"],
|
||||||
|
)
|
||||||
|
def test_completion_azure_mistral_large_function_calling(provider):
|
||||||
"""
|
"""
|
||||||
This primarily tests if the 'Function()' pydantic object correctly handles argument param passed in as a dict vs. string
|
This primarily tests if the 'Function()' pydantic object correctly handles argument param passed in as a dict vs. string
|
||||||
"""
|
"""
|
||||||
|
@ -751,8 +777,9 @@ def test_completion_azure_mistral_large_function_calling():
|
||||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="azure/mistral-large-latest",
|
model="{}/mistral-large-latest".format(provider),
|
||||||
api_base=os.getenv("AZURE_MISTRAL_API_BASE"),
|
api_base=os.getenv("AZURE_MISTRAL_API_BASE"),
|
||||||
api_key=os.getenv("AZURE_MISTRAL_API_KEY"),
|
api_key=os.getenv("AZURE_MISTRAL_API_KEY"),
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
|
|
@ -34,14 +34,15 @@ class MyCustomHandler(CustomLogger):
|
||||||
self.response_cost = 0
|
self.response_cost = 0
|
||||||
|
|
||||||
def log_pre_api_call(self, model, messages, kwargs):
|
def log_pre_api_call(self, model, messages, kwargs):
|
||||||
print(f"Pre-API Call")
|
print("Pre-API Call")
|
||||||
|
traceback.print_stack()
|
||||||
self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {})
|
self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {})
|
||||||
|
|
||||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||||
print(f"Post-API Call")
|
print("Post-API Call")
|
||||||
|
|
||||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
print(f"On Stream")
|
print("On Stream")
|
||||||
|
|
||||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
print(f"On Success")
|
print(f"On Success")
|
||||||
|
@ -372,6 +373,7 @@ async def test_async_custom_handler_embedding_optional_param():
|
||||||
Tests if the openai optional params for embedding - user + encoding_format,
|
Tests if the openai optional params for embedding - user + encoding_format,
|
||||||
are logged
|
are logged
|
||||||
"""
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
customHandler_optional_params = MyCustomHandler()
|
customHandler_optional_params = MyCustomHandler()
|
||||||
litellm.callbacks = [customHandler_optional_params]
|
litellm.callbacks = [customHandler_optional_params]
|
||||||
response = await litellm.aembedding(
|
response = await litellm.aembedding(
|
||||||
|
|
|
@ -55,8 +55,12 @@ async def test_content_policy_exception_azure():
|
||||||
except litellm.ContentPolicyViolationError as e:
|
except litellm.ContentPolicyViolationError as e:
|
||||||
print("caught a content policy violation error! Passed")
|
print("caught a content policy violation error! Passed")
|
||||||
print("exception", e)
|
print("exception", e)
|
||||||
|
assert e.litellm_debug_info is not None
|
||||||
|
assert isinstance(e.litellm_debug_info, str)
|
||||||
|
assert len(e.litellm_debug_info) > 0
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print()
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -195,6 +195,8 @@ async def test_aimage_generation_vertex_ai():
|
||||||
assert isinstance(d, litellm.ImageObject)
|
assert isinstance(d, litellm.ImageObject)
|
||||||
print("data in response.data", d)
|
print("data in response.data", d)
|
||||||
assert d.b64_json is not None
|
assert d.b64_json is not None
|
||||||
|
except litellm.ServiceUnavailableError as e:
|
||||||
|
pass
|
||||||
except litellm.RateLimitError as e:
|
except litellm.RateLimitError as e:
|
||||||
pass
|
pass
|
||||||
except litellm.ContentPolicyViolationError:
|
except litellm.ContentPolicyViolationError:
|
||||||
|
|
|
@ -16,6 +16,7 @@ from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLMRoutes
|
||||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from fastapi import Request
|
||||||
|
|
||||||
public_key = {
|
public_key = {
|
||||||
"kty": "RSA",
|
"kty": "RSA",
|
||||||
|
@ -346,6 +347,7 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
models=["gpt-3.5-turbo", "gpt-4"],
|
models=["gpt-3.5-turbo", "gpt-4"],
|
||||||
),
|
),
|
||||||
user_api_key_dict=result,
|
user_api_key_dict=result,
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"This should not fail - {str(e)}")
|
pytest.fail(f"This should not fail - {str(e)}")
|
||||||
|
@ -534,6 +536,7 @@ async def test_user_token_output(
|
||||||
models=["gpt-3.5-turbo", "gpt-4"],
|
models=["gpt-3.5-turbo", "gpt-4"],
|
||||||
),
|
),
|
||||||
user_api_key_dict=result,
|
user_api_key_dict=result,
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
if default_team_id:
|
if default_team_id:
|
||||||
await new_team(
|
await new_team(
|
||||||
|
@ -544,6 +547,7 @@ async def test_user_token_output(
|
||||||
models=["gpt-3.5-turbo", "gpt-4"],
|
models=["gpt-3.5-turbo", "gpt-4"],
|
||||||
),
|
),
|
||||||
user_api_key_dict=result,
|
user_api_key_dict=result,
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"This should not fail - {str(e)}")
|
pytest.fail(f"This should not fail - {str(e)}")
|
||||||
|
|
|
@ -137,6 +137,7 @@ async def test_new_user_response(prisma_client):
|
||||||
NewTeamRequest(
|
NewTeamRequest(
|
||||||
team_id=_team_id,
|
team_id=_team_id,
|
||||||
),
|
),
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
|
@ -272,7 +273,7 @@ def test_call_with_invalid_key(prisma_client):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got Exception", e)
|
print("Got Exception", e)
|
||||||
print(e.message)
|
print(e.message)
|
||||||
assert "Authentication Error, Invalid token passed" in e.message
|
assert "Authentication Error, Invalid proxy server token passed" in e.message
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -368,6 +369,7 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
|
||||||
new_team_response = await new_team(
|
new_team_response = await new_team(
|
||||||
data=team_request,
|
data=team_request,
|
||||||
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
print("new_team_response", new_team_response)
|
print("new_team_response", new_team_response)
|
||||||
created_team_id = new_team_response["team_id"]
|
created_team_id = new_team_response["team_id"]
|
||||||
|
@ -471,7 +473,7 @@ def test_call_with_user_over_budget(prisma_client):
|
||||||
asyncio.run(test())
|
asyncio.run(test())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
assert "Budget has been exceeded" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -652,7 +654,7 @@ def test_call_with_proxy_over_budget(prisma_client):
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
else:
|
else:
|
||||||
error_detail = traceback.format_exc()
|
error_detail = traceback.format_exc()
|
||||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
assert "Budget has been exceeded" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -730,7 +732,7 @@ def test_call_with_user_over_budget_stream(prisma_client):
|
||||||
asyncio.run(test())
|
asyncio.run(test())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
assert "Budget has been exceeded" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -827,7 +829,7 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
|
||||||
asyncio.run(test())
|
asyncio.run(test())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
assert "Budget has been exceeded" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1086,6 +1088,7 @@ def test_generate_and_update_key(prisma_client):
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
user_id="1234",
|
user_id="1234",
|
||||||
),
|
),
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
|
|
||||||
_team_2 = "ishaan-special-team_{}".format(uuid.uuid4())
|
_team_2 = "ishaan-special-team_{}".format(uuid.uuid4())
|
||||||
|
@ -1098,6 +1101,7 @@ def test_generate_and_update_key(prisma_client):
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
user_id="1234",
|
user_id="1234",
|
||||||
),
|
),
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
|
|
||||||
request = NewUserRequest(
|
request = NewUserRequest(
|
||||||
|
@ -1175,7 +1179,6 @@ def test_generate_and_update_key(prisma_client):
|
||||||
asyncio.run(test())
|
asyncio.run(test())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got Exception", e)
|
print("Got Exception", e)
|
||||||
print(e.message)
|
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1363,7 +1366,7 @@ def test_call_with_key_over_budget(prisma_client):
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
else:
|
else:
|
||||||
error_detail = str(e)
|
error_detail = str(e)
|
||||||
assert "Authentication Error, ExceededTokenBudget:" in error_detail
|
assert "Budget has been exceeded" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1477,7 +1480,7 @@ def test_call_with_key_over_model_budget(prisma_client):
|
||||||
# print(f"Error - {str(e)}")
|
# print(f"Error - {str(e)}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
assert "Authentication Error, ExceededModelBudget:" in error_detail
|
assert "Budget has been exceeded!" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1638,7 +1641,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got Exception", e)
|
print("Got Exception", e)
|
||||||
error_detail = e.message
|
error_detail = e.message
|
||||||
assert "Authentication Error, ExceededTokenBudget:" in error_detail
|
assert "Budget has been exceeded" in error_detail
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -2051,6 +2054,7 @@ async def test_master_key_hashing(prisma_client):
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
user_id="1234",
|
user_id="1234",
|
||||||
),
|
),
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
)
|
)
|
||||||
|
|
||||||
_response = await new_user(
|
_response = await new_user(
|
||||||
|
@ -2184,6 +2188,7 @@ async def test_create_update_team(prisma_client):
|
||||||
tpm_limit=20,
|
tpm_limit=20,
|
||||||
rpm_limit=20,
|
rpm_limit=20,
|
||||||
),
|
),
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
user_api_key_dict=UserAPIKeyAuth(
|
user_api_key_dict=UserAPIKeyAuth(
|
||||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||||
api_key="sk-1234",
|
api_key="sk-1234",
|
||||||
|
@ -2233,7 +2238,10 @@ async def test_create_update_team(prisma_client):
|
||||||
)
|
)
|
||||||
|
|
||||||
# now hit team_info
|
# now hit team_info
|
||||||
response = await team_info(team_id=_team_id)
|
response = await team_info(
|
||||||
|
team_id=_team_id,
|
||||||
|
http_request=Request(scope={"type": "http"}),
|
||||||
|
)
|
||||||
|
|
||||||
print("RESPONSE from team_info", response)
|
print("RESPONSE from team_info", response)
|
||||||
|
|
||||||
|
|
|
@ -1059,3 +1059,53 @@ async def test_default_model_fallbacks(sync_mode, litellm_module_fallbacks):
|
||||||
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
assert response.model is not None and response.model == "gpt-4o"
|
assert response.model is not None and response.model == "gpt-4o"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_client_side_fallbacks_list(sync_mode):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Tests Client Side Fallbacks
|
||||||
|
|
||||||
|
User can pass "fallbacks": ["gpt-3.5-turbo"] and this should work
|
||||||
|
|
||||||
|
"""
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "bad-model",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "openai/my-bad-model",
|
||||||
|
"api_key": "my-bad-api-key",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "my-good-model",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if sync_mode:
|
||||||
|
response = router.completion(
|
||||||
|
model="bad-model",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
fallbacks=["my-good-model"],
|
||||||
|
mock_testing_fallbacks=True,
|
||||||
|
mock_response="Hey! nice day",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="bad-model",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
fallbacks=["my-good-model"],
|
||||||
|
mock_testing_fallbacks=True,
|
||||||
|
mock_response="Hey! nice day",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
assert response.model is not None and response.model == "gpt-4o"
|
||||||
|
|
|
@ -1463,6 +1463,10 @@ async def test_parallel_streaming_requests(sync_mode, model):
|
||||||
|
|
||||||
except RateLimitError:
|
except RateLimitError:
|
||||||
pass
|
pass
|
||||||
|
except litellm.InternalServerError as e:
|
||||||
|
if "predibase" in str(e).lower():
|
||||||
|
# only skip internal server error from predibase - their endpoint seems quite unstable
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
@ -2535,7 +2539,10 @@ def streaming_and_function_calling_format_tests(idx, chunk):
|
||||||
return extracted_chunk, finished
|
return extracted_chunk, finished
|
||||||
|
|
||||||
|
|
||||||
def test_openai_streaming_and_function_calling():
|
@pytest.mark.parametrize(
|
||||||
|
"model", ["gpt-3.5-turbo", "anthropic.claude-3-sonnet-20240229-v1:0"]
|
||||||
|
)
|
||||||
|
def test_streaming_and_function_calling(model):
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
|
@ -2556,16 +2563,21 @@ def test_openai_streaming_and_function_calling():
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
|
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
|
||||||
try:
|
try:
|
||||||
response = completion(
|
litellm.set_verbose = True
|
||||||
model="gpt-3.5-turbo",
|
response: litellm.CustomStreamWrapper = completion(
|
||||||
|
model=model,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
tool_choice="required",
|
||||||
|
) # type: ignore
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
for idx, chunk in enumerate(response):
|
for idx, chunk in enumerate(response):
|
||||||
|
# continue
|
||||||
|
print("\n{}\n".format(chunk))
|
||||||
if idx == 0:
|
if idx == 0:
|
||||||
assert (
|
assert (
|
||||||
chunk.choices[0].delta.tool_calls[0].function.arguments is not None
|
chunk.choices[0].delta.tool_calls[0].function.arguments is not None
|
||||||
|
@ -2573,6 +2585,7 @@ def test_openai_streaming_and_function_calling():
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
chunk.choices[0].delta.tool_calls[0].function.arguments, str
|
chunk.choices[0].delta.tool_calls[0].function.arguments, str
|
||||||
)
|
)
|
||||||
|
# assert False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
|
|
@ -3990,6 +3990,7 @@ def test_async_text_completion():
|
||||||
asyncio.run(test_get_response())
|
asyncio.run(test_get_response())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="Tgai endpoints are unstable")
|
||||||
def test_async_text_completion_together_ai():
|
def test_async_text_completion_together_ai():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
print("test_async_text_completion")
|
print("test_async_text_completion")
|
||||||
|
|
|
@ -187,12 +187,43 @@ def test_load_test_token_counter(model):
|
||||||
print("model={}, total test time={}".format(model, total_time))
|
print("model={}, total test time={}".format(model, total_time))
|
||||||
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
||||||
|
|
||||||
|
|
||||||
def test_openai_token_with_image_and_text():
|
def test_openai_token_with_image_and_text():
|
||||||
model = "gpt-4o"
|
model = "gpt-4o"
|
||||||
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
|
full_request = {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "json",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["clause"],
|
||||||
|
"properties": {"clause": {"type": "string"}},
|
||||||
|
},
|
||||||
|
"description": "Respond with a JSON object.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logprobs": False,
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"text": "\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.",
|
||||||
|
"type": "text",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tool_choice": {"type": "function", "function": {"name": "json"}},
|
||||||
|
"exclude_models": [],
|
||||||
|
"disable_fallback": False,
|
||||||
|
"exclude_providers": [],
|
||||||
|
}
|
||||||
messages = full_request.get("messages", [])
|
messages = full_request.get("messages", [])
|
||||||
|
|
||||||
token_count = token_counter(model=model, messages=messages)
|
token_count = token_counter(model=model, messages=messages)
|
||||||
print(token_count)
|
print(token_count)
|
||||||
|
|
||||||
test_openai_token_with_image_and_text()
|
|
|
@ -23,6 +23,7 @@ from litellm.utils import (
|
||||||
create_pretrained_tokenizer,
|
create_pretrained_tokenizer,
|
||||||
create_tokenizer,
|
create_tokenizer,
|
||||||
get_max_tokens,
|
get_max_tokens,
|
||||||
|
get_supported_openai_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
|
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
|
||||||
|
@ -386,3 +387,11 @@ def test_get_max_token_unit_test():
|
||||||
) # Returns a number instead of throwing an Exception
|
) # Returns a number instead of throwing an Exception
|
||||||
|
|
||||||
assert isinstance(max_tokens, int)
|
assert isinstance(max_tokens, int)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_supported_openai_params() -> None:
|
||||||
|
# Mapped provider
|
||||||
|
assert isinstance(get_supported_openai_params("gpt-4"), list)
|
||||||
|
|
||||||
|
# Unmapped provider
|
||||||
|
assert get_supported_openai_params("nonexistent") is None
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from typing import TypedDict, Any, Union, Optional, Literal, List
|
from typing import TypedDict, Any, Union, Optional, Literal, List
|
||||||
import json
|
import json
|
||||||
|
from .openai import ChatCompletionToolCallChunk
|
||||||
from typing_extensions import (
|
from typing_extensions import (
|
||||||
Self,
|
Self,
|
||||||
Protocol,
|
Protocol,
|
||||||
|
@ -118,6 +119,15 @@ class ToolBlockDeltaEvent(TypedDict):
|
||||||
input: str
|
input: str
|
||||||
|
|
||||||
|
|
||||||
|
class ToolUseBlockStartEvent(TypedDict):
|
||||||
|
name: str
|
||||||
|
toolUseId: str
|
||||||
|
|
||||||
|
|
||||||
|
class ContentBlockStartEvent(TypedDict, total=False):
|
||||||
|
toolUse: Optional[ToolUseBlockStartEvent]
|
||||||
|
|
||||||
|
|
||||||
class ContentBlockDeltaEvent(TypedDict, total=False):
|
class ContentBlockDeltaEvent(TypedDict, total=False):
|
||||||
"""
|
"""
|
||||||
Either 'text' or 'toolUse' will be specified for Converse API streaming response.
|
Either 'text' or 'toolUse' will be specified for Converse API streaming response.
|
||||||
|
@ -138,10 +148,11 @@ class RequestObject(TypedDict, total=False):
|
||||||
|
|
||||||
class GenericStreamingChunk(TypedDict):
|
class GenericStreamingChunk(TypedDict):
|
||||||
text: Required[str]
|
text: Required[str]
|
||||||
tool_str: Required[str]
|
tool_use: Optional[ChatCompletionToolCallChunk]
|
||||||
is_finished: Required[bool]
|
is_finished: Required[bool]
|
||||||
finish_reason: Required[str]
|
finish_reason: Required[str]
|
||||||
usage: Optional[ConverseTokenUsageBlock]
|
usage: Optional[ConverseTokenUsageBlock]
|
||||||
|
index: int
|
||||||
|
|
||||||
|
|
||||||
class Document(TypedDict):
|
class Document(TypedDict):
|
||||||
|
|
|
@ -296,14 +296,27 @@ class ListBatchRequest(TypedDict, total=False):
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionToolCallFunctionChunk(TypedDict):
|
class ChatCompletionToolCallFunctionChunk(TypedDict):
|
||||||
name: str
|
name: Optional[str]
|
||||||
arguments: str
|
arguments: str
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionToolCallChunk(TypedDict):
|
class ChatCompletionToolCallChunk(TypedDict):
|
||||||
|
id: Optional[str]
|
||||||
|
type: Literal["function"]
|
||||||
|
function: ChatCompletionToolCallFunctionChunk
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionDeltaToolCallChunk(TypedDict):
|
||||||
id: str
|
id: str
|
||||||
type: Literal["function"]
|
type: Literal["function"]
|
||||||
function: ChatCompletionToolCallFunctionChunk
|
function: ChatCompletionToolCallFunctionChunk
|
||||||
|
index: int
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
||||||
|
content: Optional[str]
|
||||||
|
tool_calls: List[ChatCompletionDeltaToolCallChunk]
|
||||||
|
role: str
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionResponseMessage(TypedDict, total=False):
|
class ChatCompletionResponseMessage(TypedDict, total=False):
|
||||||
|
|
182
litellm/utils.py
182
litellm/utils.py
|
@ -30,7 +30,7 @@ from dataclasses import (
|
||||||
dataclass,
|
dataclass,
|
||||||
field,
|
field,
|
||||||
)
|
)
|
||||||
|
import os
|
||||||
import litellm._service_logger # for storing API inputs, outputs, and metadata
|
import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
|
@ -49,9 +49,9 @@ except (ImportError, AttributeError):
|
||||||
|
|
||||||
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
||||||
|
|
||||||
os.environ["TIKTOKEN_CACHE_DIR"] = (
|
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
|
||||||
filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
"CUSTOM_TIKTOKEN_CACHE_DIR", filename
|
||||||
)
|
) # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||||
|
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
from importlib import resources
|
from importlib import resources
|
||||||
|
@ -63,6 +63,11 @@ claude_json_str = json.dumps(json_data)
|
||||||
import importlib.metadata
|
import importlib.metadata
|
||||||
from ._logging import verbose_logger
|
from ._logging import verbose_logger
|
||||||
from .types.router import LiteLLM_Params
|
from .types.router import LiteLLM_Params
|
||||||
|
from .types.llms.openai import (
|
||||||
|
ChatCompletionToolCallChunk,
|
||||||
|
ChatCompletionToolCallFunctionChunk,
|
||||||
|
ChatCompletionDeltaToolCallChunk,
|
||||||
|
)
|
||||||
from .integrations.traceloop import TraceloopLogger
|
from .integrations.traceloop import TraceloopLogger
|
||||||
from .integrations.athina import AthinaLogger
|
from .integrations.athina import AthinaLogger
|
||||||
from .integrations.helicone import HeliconeLogger
|
from .integrations.helicone import HeliconeLogger
|
||||||
|
@ -933,7 +938,6 @@ class TextCompletionResponse(OpenAIObject):
|
||||||
object=None,
|
object=None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
object = "text_completion.chunk"
|
object = "text_completion.chunk"
|
||||||
choices = [TextChoices()]
|
choices = [TextChoices()]
|
||||||
|
@ -942,7 +946,6 @@ class TextCompletionResponse(OpenAIObject):
|
||||||
if choices is not None and isinstance(choices, list):
|
if choices is not None and isinstance(choices, list):
|
||||||
new_choices = []
|
new_choices = []
|
||||||
for choice in choices:
|
for choice in choices:
|
||||||
|
|
||||||
if isinstance(choice, TextChoices):
|
if isinstance(choice, TextChoices):
|
||||||
_new_choice = choice
|
_new_choice = choice
|
||||||
elif isinstance(choice, dict):
|
elif isinstance(choice, dict):
|
||||||
|
@ -1018,7 +1021,6 @@ class ImageObject(OpenAIObject):
|
||||||
revised_prompt: Optional[str] = None
|
revised_prompt: Optional[str] = None
|
||||||
|
|
||||||
def __init__(self, b64_json=None, url=None, revised_prompt=None):
|
def __init__(self, b64_json=None, url=None, revised_prompt=None):
|
||||||
|
|
||||||
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
|
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
|
@ -1342,28 +1344,29 @@ class Logging:
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
|
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
|
||||||
# log raw request to provider (like LangFuse)
|
# log raw request to provider (like LangFuse) -- if opted in.
|
||||||
try:
|
if litellm.log_raw_request_response is True:
|
||||||
# [Non-blocking Extra Debug Information in metadata]
|
try:
|
||||||
_litellm_params = self.model_call_details.get("litellm_params", {})
|
# [Non-blocking Extra Debug Information in metadata]
|
||||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
_litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
if (
|
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||||
litellm.turn_off_message_logging is not None
|
if (
|
||||||
and litellm.turn_off_message_logging is True
|
litellm.turn_off_message_logging is not None
|
||||||
):
|
and litellm.turn_off_message_logging is True
|
||||||
|
):
|
||||||
|
_metadata["raw_request"] = (
|
||||||
|
"redacted by litellm. \
|
||||||
|
'litellm.turn_off_message_logging=True'"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_metadata["raw_request"] = str(curl_command)
|
||||||
|
except Exception as e:
|
||||||
_metadata["raw_request"] = (
|
_metadata["raw_request"] = (
|
||||||
"redacted by litellm. \
|
"Unable to Log \
|
||||||
'litellm.turn_off_message_logging=True'"
|
raw request: {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
_metadata["raw_request"] = str(curl_command)
|
|
||||||
except Exception as e:
|
|
||||||
_metadata["raw_request"] = (
|
|
||||||
"Unable to Log \
|
|
||||||
raw request: {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if self.logger_fn and callable(self.logger_fn):
|
if self.logger_fn and callable(self.logger_fn):
|
||||||
try:
|
try:
|
||||||
self.logger_fn(
|
self.logger_fn(
|
||||||
|
@ -1621,7 +1624,6 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
||||||
complete_streaming_response = None
|
complete_streaming_response = None
|
||||||
else:
|
else:
|
||||||
self.sync_streaming_chunks.append(result)
|
self.sync_streaming_chunks.append(result)
|
||||||
|
@ -2391,7 +2393,6 @@ class Logging:
|
||||||
"async_complete_streaming_response"
|
"async_complete_streaming_response"
|
||||||
in self.model_call_details
|
in self.model_call_details
|
||||||
):
|
):
|
||||||
|
|
||||||
await customLogger.async_log_event(
|
await customLogger.async_log_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
response_obj=self.model_call_details[
|
response_obj=self.model_call_details[
|
||||||
|
@ -2730,7 +2731,7 @@ class Logging:
|
||||||
only redacts when litellm.turn_off_message_logging == True
|
only redacts when litellm.turn_off_message_logging == True
|
||||||
"""
|
"""
|
||||||
# check if user opted out of logging message/response to callbacks
|
# check if user opted out of logging message/response to callbacks
|
||||||
if litellm.turn_off_message_logging == True:
|
if litellm.turn_off_message_logging is True:
|
||||||
# remove messages, prompts, input, response from logging
|
# remove messages, prompts, input, response from logging
|
||||||
self.model_call_details["messages"] = [
|
self.model_call_details["messages"] = [
|
||||||
{"role": "user", "content": "redacted-by-litellm"}
|
{"role": "user", "content": "redacted-by-litellm"}
|
||||||
|
@ -3250,7 +3251,7 @@ def client(original_function):
|
||||||
stream=kwargs.get("stream", False),
|
stream=kwargs.get("stream", False),
|
||||||
)
|
)
|
||||||
|
|
||||||
if kwargs.get("stream", False) == True:
|
if kwargs.get("stream", False) is True:
|
||||||
cached_result = CustomStreamWrapper(
|
cached_result = CustomStreamWrapper(
|
||||||
completion_stream=cached_result,
|
completion_stream=cached_result,
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -4030,7 +4031,10 @@ def openai_token_counter(
|
||||||
"""
|
"""
|
||||||
print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}")
|
print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}")
|
||||||
try:
|
try:
|
||||||
encoding = tiktoken.encoding_for_model(model)
|
if "gpt-4o" in model:
|
||||||
|
encoding = tiktoken.get_encoding("o200k_base")
|
||||||
|
else:
|
||||||
|
encoding = tiktoken.encoding_for_model(model)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
print_verbose("Warning: model not found. Using cl100k_base encoding.")
|
print_verbose("Warning: model not found. Using cl100k_base encoding.")
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
|
@ -4894,6 +4898,18 @@ def get_optional_params_embeddings(
|
||||||
)
|
)
|
||||||
final_params = {**optional_params, **kwargs}
|
final_params = {**optional_params, **kwargs}
|
||||||
return final_params
|
return final_params
|
||||||
|
if custom_llm_provider == "vertex_ai":
|
||||||
|
supported_params = get_supported_openai_params(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
request_type="embeddings",
|
||||||
|
)
|
||||||
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
optional_params = litellm.VertexAITextEmbeddingConfig().map_openai_params(
|
||||||
|
non_default_params=non_default_params, optional_params={}
|
||||||
|
)
|
||||||
|
final_params = {**optional_params, **kwargs}
|
||||||
|
return final_params
|
||||||
if custom_llm_provider == "vertex_ai":
|
if custom_llm_provider == "vertex_ai":
|
||||||
if len(non_default_params.keys()) > 0:
|
if len(non_default_params.keys()) > 0:
|
||||||
if litellm.drop_params is True: # drop the unsupported non-default values
|
if litellm.drop_params is True: # drop the unsupported non-default values
|
||||||
|
@ -4927,7 +4943,18 @@ def get_optional_params_embeddings(
|
||||||
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
|
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
|
||||||
)
|
)
|
||||||
return {**non_default_params, **kwargs}
|
return {**non_default_params, **kwargs}
|
||||||
|
if custom_llm_provider == "mistral":
|
||||||
|
supported_params = get_supported_openai_params(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="mistral",
|
||||||
|
request_type="embeddings",
|
||||||
|
)
|
||||||
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
|
||||||
|
non_default_params=non_default_params, optional_params={}
|
||||||
|
)
|
||||||
|
final_params = {**optional_params, **kwargs}
|
||||||
|
return final_params
|
||||||
if (
|
if (
|
||||||
custom_llm_provider != "openai"
|
custom_llm_provider != "openai"
|
||||||
and custom_llm_provider != "azure"
|
and custom_llm_provider != "azure"
|
||||||
|
@ -6166,13 +6193,16 @@ def get_api_base(
|
||||||
if litellm.model_alias_map and model in litellm.model_alias_map:
|
if litellm.model_alias_map and model in litellm.model_alias_map:
|
||||||
model = litellm.model_alias_map[model]
|
model = litellm.model_alias_map[model]
|
||||||
try:
|
try:
|
||||||
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
|
(
|
||||||
get_llm_provider(
|
model,
|
||||||
model=model,
|
custom_llm_provider,
|
||||||
custom_llm_provider=_optional_params.custom_llm_provider,
|
dynamic_api_key,
|
||||||
api_base=_optional_params.api_base,
|
dynamic_api_base,
|
||||||
api_key=_optional_params.api_key,
|
) = get_llm_provider(
|
||||||
)
|
model=model,
|
||||||
|
custom_llm_provider=_optional_params.custom_llm_provider,
|
||||||
|
api_base=_optional_params.api_base,
|
||||||
|
api_key=_optional_params.api_key,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.debug("Error occurred in getting api base - {}".format(str(e)))
|
verbose_logger.debug("Error occurred in getting api base - {}".format(str(e)))
|
||||||
|
@ -6220,7 +6250,7 @@ def get_first_chars_messages(kwargs: dict) -> str:
|
||||||
|
|
||||||
def get_supported_openai_params(
|
def get_supported_openai_params(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: Optional[str] = None,
|
||||||
request_type: Literal["chat_completion", "embeddings"] = "chat_completion",
|
request_type: Literal["chat_completion", "embeddings"] = "chat_completion",
|
||||||
) -> Optional[list]:
|
) -> Optional[list]:
|
||||||
"""
|
"""
|
||||||
|
@ -6235,6 +6265,11 @@ def get_supported_openai_params(
|
||||||
- List if custom_llm_provider is mapped
|
- List if custom_llm_provider is mapped
|
||||||
- None if unmapped
|
- None if unmapped
|
||||||
"""
|
"""
|
||||||
|
if not custom_llm_provider:
|
||||||
|
try:
|
||||||
|
custom_llm_provider = litellm.get_llm_provider(model=model)[1]
|
||||||
|
except BadRequestError:
|
||||||
|
return None
|
||||||
if custom_llm_provider == "bedrock":
|
if custom_llm_provider == "bedrock":
|
||||||
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
|
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
|
||||||
elif custom_llm_provider == "ollama":
|
elif custom_llm_provider == "ollama":
|
||||||
|
@ -6328,7 +6363,10 @@ def get_supported_openai_params(
|
||||||
"max_retries",
|
"max_retries",
|
||||||
]
|
]
|
||||||
elif custom_llm_provider == "mistral":
|
elif custom_llm_provider == "mistral":
|
||||||
return litellm.MistralConfig().get_supported_openai_params()
|
if request_type == "chat_completion":
|
||||||
|
return litellm.MistralConfig().get_supported_openai_params()
|
||||||
|
elif request_type == "embeddings":
|
||||||
|
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
|
||||||
elif custom_llm_provider == "replicate":
|
elif custom_llm_provider == "replicate":
|
||||||
return [
|
return [
|
||||||
"stream",
|
"stream",
|
||||||
|
@ -6370,7 +6408,10 @@ def get_supported_openai_params(
|
||||||
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
||||||
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
|
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
return litellm.VertexAIConfig().get_supported_openai_params()
|
if request_type == "chat_completion":
|
||||||
|
return litellm.VertexAIConfig().get_supported_openai_params()
|
||||||
|
elif request_type == "embeddings":
|
||||||
|
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
|
||||||
elif custom_llm_provider == "sagemaker":
|
elif custom_llm_provider == "sagemaker":
|
||||||
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
|
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
|
||||||
elif custom_llm_provider == "aleph_alpha":
|
elif custom_llm_provider == "aleph_alpha":
|
||||||
|
@ -6577,6 +6618,9 @@ def get_llm_provider(
|
||||||
or get_secret("FIREWORKSAI_API_KEY")
|
or get_secret("FIREWORKSAI_API_KEY")
|
||||||
or get_secret("FIREWORKS_AI_TOKEN")
|
or get_secret("FIREWORKS_AI_TOKEN")
|
||||||
)
|
)
|
||||||
|
elif custom_llm_provider == "azure_ai":
|
||||||
|
api_base = api_base or get_secret("AZURE_AI_API_BASE") # type: ignore
|
||||||
|
dynamic_api_key = api_key or get_secret("AZURE_AI_API_KEY")
|
||||||
elif custom_llm_provider == "mistral":
|
elif custom_llm_provider == "mistral":
|
||||||
# mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai
|
# mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai
|
||||||
api_base = (
|
api_base = (
|
||||||
|
@ -7458,7 +7502,6 @@ def validate_environment(model: Optional[str] = None) -> dict:
|
||||||
|
|
||||||
|
|
||||||
def set_callbacks(callback_list, function_id=None):
|
def set_callbacks(callback_list, function_id=None):
|
||||||
|
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -8767,6 +8810,13 @@ def exception_type(
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
litellm_debug_info=extra_information,
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
|
if "Request failed during generation" in error_str:
|
||||||
|
# this is an internal server error from predibase
|
||||||
|
raise litellm.InternalServerError(
|
||||||
|
message=f"PredibaseException - {error_str}",
|
||||||
|
llm_provider="predibase",
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
elif hasattr(original_exception, "status_code"):
|
elif hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 500:
|
if original_exception.status_code == 500:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -9085,7 +9135,7 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"VertexAIException RateLimitError - {error_str}",
|
message=f"litellm.RateLimitError: VertexAIException - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
litellm_debug_info=extra_information,
|
litellm_debug_info=extra_information,
|
||||||
|
@ -9097,7 +9147,14 @@ def exception_type(
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
elif "500 Internal Server Error" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise ServiceUnavailableError(
|
||||||
|
message=f"litellm.ServiceUnavailableError: VertexAIException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="vertex_ai",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 400:
|
if original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -10048,6 +10105,14 @@ def get_secret(
|
||||||
return oidc_token
|
return oidc_token
|
||||||
else:
|
else:
|
||||||
raise ValueError("Github OIDC provider failed")
|
raise ValueError("Github OIDC provider failed")
|
||||||
|
elif oidc_provider == "azure":
|
||||||
|
# https://azure.github.io/azure-workload-identity/docs/quick-start.html
|
||||||
|
azure_federated_token_file = os.getenv("AZURE_FEDERATED_TOKEN_FILE")
|
||||||
|
if azure_federated_token_file is None:
|
||||||
|
raise ValueError("AZURE_FEDERATED_TOKEN_FILE not found in environment")
|
||||||
|
with open(azure_federated_token_file, "r") as f:
|
||||||
|
oidc_token = f.read()
|
||||||
|
return oidc_token
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported OIDC provider")
|
raise ValueError("Unsupported OIDC provider")
|
||||||
|
|
||||||
|
@ -11311,7 +11376,6 @@ class CustomStreamWrapper:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
response_obj: GenericStreamingChunk = chunk
|
response_obj: GenericStreamingChunk = chunk
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
|
||||||
if response_obj["is_finished"]:
|
if response_obj["is_finished"]:
|
||||||
self.received_finish_reason = response_obj["finish_reason"]
|
self.received_finish_reason = response_obj["finish_reason"]
|
||||||
|
|
||||||
|
@ -11326,6 +11390,10 @@ class CustomStreamWrapper:
|
||||||
completion_tokens=response_obj["usage"]["outputTokens"],
|
completion_tokens=response_obj["usage"]["outputTokens"],
|
||||||
total_tokens=response_obj["usage"]["totalTokens"],
|
total_tokens=response_obj["usage"]["totalTokens"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
|
||||||
|
completion_obj["tool_calls"] = [response_obj["tool_use"]]
|
||||||
|
|
||||||
elif self.custom_llm_provider == "sagemaker":
|
elif self.custom_llm_provider == "sagemaker":
|
||||||
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
|
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
|
||||||
response_obj = self.handle_sagemaker_stream(chunk)
|
response_obj = self.handle_sagemaker_stream(chunk)
|
||||||
|
@ -11342,7 +11410,6 @@ class CustomStreamWrapper:
|
||||||
new_chunk = self.completion_stream[:chunk_size]
|
new_chunk = self.completion_stream[:chunk_size]
|
||||||
completion_obj["content"] = new_chunk
|
completion_obj["content"] = new_chunk
|
||||||
self.completion_stream = self.completion_stream[chunk_size:]
|
self.completion_stream = self.completion_stream[chunk_size:]
|
||||||
time.sleep(0.05)
|
|
||||||
elif self.custom_llm_provider == "palm":
|
elif self.custom_llm_provider == "palm":
|
||||||
# fake streaming
|
# fake streaming
|
||||||
response_obj = {}
|
response_obj = {}
|
||||||
|
@ -11355,7 +11422,6 @@ class CustomStreamWrapper:
|
||||||
new_chunk = self.completion_stream[:chunk_size]
|
new_chunk = self.completion_stream[:chunk_size]
|
||||||
completion_obj["content"] = new_chunk
|
completion_obj["content"] = new_chunk
|
||||||
self.completion_stream = self.completion_stream[chunk_size:]
|
self.completion_stream = self.completion_stream[chunk_size:]
|
||||||
time.sleep(0.05)
|
|
||||||
elif self.custom_llm_provider == "ollama":
|
elif self.custom_llm_provider == "ollama":
|
||||||
response_obj = self.handle_ollama_stream(chunk)
|
response_obj = self.handle_ollama_stream(chunk)
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
@ -11442,7 +11508,7 @@ class CustomStreamWrapper:
|
||||||
# for azure, we need to pass the model from the orignal chunk
|
# for azure, we need to pass the model from the orignal chunk
|
||||||
self.model = chunk.model
|
self.model = chunk.model
|
||||||
response_obj = self.handle_openai_chat_completion_chunk(chunk)
|
response_obj = self.handle_openai_chat_completion_chunk(chunk)
|
||||||
if response_obj == None:
|
if response_obj is None:
|
||||||
return
|
return
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||||
|
@ -11575,7 +11641,7 @@ class CustomStreamWrapper:
|
||||||
else:
|
else:
|
||||||
if (
|
if (
|
||||||
self.stream_options is not None
|
self.stream_options is not None
|
||||||
and self.stream_options["include_usage"] == True
|
and self.stream_options["include_usage"] is True
|
||||||
):
|
):
|
||||||
return model_response
|
return model_response
|
||||||
return
|
return
|
||||||
|
@ -11600,8 +11666,14 @@ class CustomStreamWrapper:
|
||||||
return model_response
|
return model_response
|
||||||
elif (
|
elif (
|
||||||
"content" in completion_obj
|
"content" in completion_obj
|
||||||
and isinstance(completion_obj["content"], str)
|
and (
|
||||||
and len(completion_obj["content"]) > 0
|
isinstance(completion_obj["content"], str)
|
||||||
|
and len(completion_obj["content"]) > 0
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
"tool_calls" in completion_obj
|
||||||
|
and len(completion_obj["tool_calls"]) > 0
|
||||||
|
)
|
||||||
): # cannot set content of an OpenAI Object to be an empty string
|
): # cannot set content of an OpenAI Object to be an empty string
|
||||||
hold, model_response_str = self.check_special_tokens(
|
hold, model_response_str = self.check_special_tokens(
|
||||||
chunk=completion_obj["content"],
|
chunk=completion_obj["content"],
|
||||||
|
@ -11657,7 +11729,7 @@ class CustomStreamWrapper:
|
||||||
else:
|
else:
|
||||||
## else
|
## else
|
||||||
completion_obj["content"] = model_response_str
|
completion_obj["content"] = model_response_str
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk is False:
|
||||||
completion_obj["role"] = "assistant"
|
completion_obj["role"] = "assistant"
|
||||||
self.sent_first_chunk = True
|
self.sent_first_chunk = True
|
||||||
model_response.choices[0].delta = Delta(**completion_obj)
|
model_response.choices[0].delta = Delta(**completion_obj)
|
||||||
|
@ -11666,7 +11738,7 @@ class CustomStreamWrapper:
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
elif self.received_finish_reason is not None:
|
elif self.received_finish_reason is not None:
|
||||||
if self.sent_last_chunk == True:
|
if self.sent_last_chunk is True:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
# flush any remaining holding chunk
|
# flush any remaining holding chunk
|
||||||
if len(self.holding_chunk) > 0:
|
if len(self.holding_chunk) > 0:
|
||||||
|
|
|
@ -1387,6 +1387,26 @@
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||||
},
|
},
|
||||||
|
"text-embedding-004": {
|
||||||
|
"max_tokens": 3072,
|
||||||
|
"max_input_tokens": 3072,
|
||||||
|
"output_vector_size": 768,
|
||||||
|
"input_cost_per_token": 0.00000000625,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-embedding-models",
|
||||||
|
"mode": "embedding",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||||
|
},
|
||||||
|
"text-multilingual-embedding-002": {
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"max_input_tokens": 2048,
|
||||||
|
"output_vector_size": 768,
|
||||||
|
"input_cost_per_token": 0.00000000625,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-embedding-models",
|
||||||
|
"mode": "embedding",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||||
|
},
|
||||||
"textembedding-gecko": {
|
"textembedding-gecko": {
|
||||||
"max_tokens": 3072,
|
"max_tokens": 3072,
|
||||||
"max_input_tokens": 3072,
|
"max_input_tokens": 3072,
|
||||||
|
|
50
poetry.lock
generated
50
poetry.lock
generated
|
@ -1545,6 +1545,53 @@ files = [
|
||||||
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
|
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy"
|
||||||
|
version = "1.10.0"
|
||||||
|
description = "Optional static typing for Python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"},
|
||||||
|
{file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"},
|
||||||
|
{file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"},
|
||||||
|
{file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"},
|
||||||
|
{file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"},
|
||||||
|
{file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"},
|
||||||
|
{file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"},
|
||||||
|
{file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"},
|
||||||
|
{file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"},
|
||||||
|
{file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"},
|
||||||
|
{file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"},
|
||||||
|
{file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"},
|
||||||
|
{file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"},
|
||||||
|
{file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"},
|
||||||
|
{file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"},
|
||||||
|
{file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"},
|
||||||
|
{file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"},
|
||||||
|
{file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"},
|
||||||
|
{file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"},
|
||||||
|
{file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"},
|
||||||
|
{file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"},
|
||||||
|
{file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"},
|
||||||
|
{file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"},
|
||||||
|
{file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"},
|
||||||
|
{file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"},
|
||||||
|
{file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"},
|
||||||
|
{file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
mypy-extensions = ">=1.0.0"
|
||||||
|
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||||
|
typing-extensions = ">=4.1.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dmypy = ["psutil (>=4.0)"]
|
||||||
|
install-types = ["pip"]
|
||||||
|
mypyc = ["setuptools (>=50)"]
|
||||||
|
reports = ["lxml"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mypy-extensions"
|
name = "mypy-extensions"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
|
@ -2127,6 +2174,7 @@ files = [
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
|
||||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||||
|
@ -3150,4 +3198,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||||
content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"
|
content-hash = "73054c657782120d170dc168ef07b494a916f1f810ff9c2b0ac878bd857a9dac"
|
||||||
|
|
|
@ -85,6 +85,9 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
- model_name: mistral-embed
|
||||||
|
litellm_params:
|
||||||
|
model: mistral/mistral-embed
|
||||||
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
|
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.40.8"
|
version = "1.40.9"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -19,7 +19,7 @@ documentation = "https://docs.litellm.ai"
|
||||||
python = ">=3.8.1,<4.0, !=3.9.7"
|
python = ">=3.8.1,<4.0, !=3.9.7"
|
||||||
openai = ">=1.27.0"
|
openai = ">=1.27.0"
|
||||||
python-dotenv = ">=0.2.0"
|
python-dotenv = ">=0.2.0"
|
||||||
tiktoken = ">=0.4.0"
|
tiktoken = ">=0.7.0"
|
||||||
importlib-metadata = ">=6.8.0"
|
importlib-metadata = ">=6.8.0"
|
||||||
tokenizers = "*"
|
tokenizers = "*"
|
||||||
click = "*"
|
click = "*"
|
||||||
|
@ -76,6 +76,7 @@ litellm = 'litellm:run_server'
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
flake8 = "^6.1.0"
|
flake8 = "^6.1.0"
|
||||||
black = "^23.12.0"
|
black = "^23.12.0"
|
||||||
|
mypy = "^1.0"
|
||||||
pytest = "^7.4.3"
|
pytest = "^7.4.3"
|
||||||
pytest-mock = "^3.12.0"
|
pytest-mock = "^3.12.0"
|
||||||
|
|
||||||
|
@ -84,7 +85,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.40.8"
|
version = "1.40.9"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -34,7 +34,7 @@ opentelemetry-exporter-otlp==1.25.0
|
||||||
|
|
||||||
### LITELLM PACKAGE DEPENDENCIES
|
### LITELLM PACKAGE DEPENDENCIES
|
||||||
python-dotenv==1.0.0 # for env
|
python-dotenv==1.0.0 # for env
|
||||||
tiktoken==0.6.0 # for calculating usage
|
tiktoken==0.7.0 # for calculating usage
|
||||||
importlib-metadata==6.8.0 # for random utils
|
importlib-metadata==6.8.0 # for random utils
|
||||||
tokenizers==0.14.0 # for calculating usage
|
tokenizers==0.14.0 # for calculating usage
|
||||||
click==8.1.7 # for proxy cli
|
click==8.1.7 # for proxy cli
|
||||||
|
|
|
@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
|
||||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||||
model_spend Json @default("{}")
|
model_spend Json @default("{}")
|
||||||
model_max_budget Json @default("{}")
|
model_max_budget Json @default("{}")
|
||||||
model_id Int? @unique
|
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
|
||||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||||
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
||||||
}
|
}
|
||||||
|
|
|
@ -664,7 +664,7 @@ async def test_key_crossing_budget():
|
||||||
response = await chat_completion(session=session, key=key)
|
response = await chat_completion(session=session, key=key)
|
||||||
pytest.fail("Should have failed - Key crossed it's budget")
|
pytest.fail("Should have failed - Key crossed it's budget")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
assert "ExceededTokenBudget: Current spend for token:" in str(e)
|
assert "Budget has been exceeded!" in str(e)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
|
|
|
@ -22,6 +22,7 @@ async def generate_key(
|
||||||
"text-embedding-ada-002",
|
"text-embedding-ada-002",
|
||||||
"dall-e-2",
|
"dall-e-2",
|
||||||
"fake-openai-endpoint-2",
|
"fake-openai-endpoint-2",
|
||||||
|
"mistral-embed",
|
||||||
],
|
],
|
||||||
):
|
):
|
||||||
url = "http://0.0.0.0:4000/key/generate"
|
url = "http://0.0.0.0:4000/key/generate"
|
||||||
|
@ -197,14 +198,14 @@ async def completion(session, key):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
async def embeddings(session, key):
|
async def embeddings(session, key, model="text-embedding-ada-002"):
|
||||||
url = "http://0.0.0.0:4000/embeddings"
|
url = "http://0.0.0.0:4000/embeddings"
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {key}",
|
"Authorization": f"Bearer {key}",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
"model": "text-embedding-ada-002",
|
"model": model,
|
||||||
"input": ["hello world"],
|
"input": ["hello world"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -408,6 +409,9 @@ async def test_embeddings():
|
||||||
key_2 = key_gen["key"]
|
key_2 = key_gen["key"]
|
||||||
await embeddings(session=session, key=key_2)
|
await embeddings(session=session, key=key_2)
|
||||||
|
|
||||||
|
# embedding request with non OpenAI model
|
||||||
|
await embeddings(session=session, key=key, model="mistral-embed")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_image_generation():
|
async def test_image_generation():
|
||||||
|
|
|
@ -49,7 +49,7 @@ async def new_user(
|
||||||
|
|
||||||
|
|
||||||
async def add_member(
|
async def add_member(
|
||||||
session, i, team_id, user_id=None, user_email=None, max_budget=None
|
session, i, team_id, user_id=None, user_email=None, max_budget=None, members=None
|
||||||
):
|
):
|
||||||
url = "http://0.0.0.0:4000/team/member_add"
|
url = "http://0.0.0.0:4000/team/member_add"
|
||||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||||
|
@ -58,10 +58,13 @@ async def add_member(
|
||||||
data["member"]["user_email"] = user_email
|
data["member"]["user_email"] = user_email
|
||||||
elif user_id is not None:
|
elif user_id is not None:
|
||||||
data["member"]["user_id"] = user_id
|
data["member"]["user_id"] = user_id
|
||||||
|
elif members is not None:
|
||||||
|
data["member"] = members
|
||||||
|
|
||||||
if max_budget is not None:
|
if max_budget is not None:
|
||||||
data["max_budget_in_team"] = max_budget
|
data["max_budget_in_team"] = max_budget
|
||||||
|
|
||||||
|
print("sent data: {}".format(data))
|
||||||
async with session.post(url, headers=headers, json=data) as response:
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
status = response.status
|
status = response.status
|
||||||
response_text = await response.text()
|
response_text = await response.text()
|
||||||
|
@ -339,7 +342,7 @@ async def test_team_info():
|
||||||
async def test_team_update_sc_2():
|
async def test_team_update_sc_2():
|
||||||
"""
|
"""
|
||||||
- Create team
|
- Create team
|
||||||
- Add 1 user (doesn't exist in db)
|
- Add 3 users (doesn't exist in db)
|
||||||
- Change team alias
|
- Change team alias
|
||||||
- Check if it works
|
- Check if it works
|
||||||
- Assert team object unchanged besides team alias
|
- Assert team object unchanged besides team alias
|
||||||
|
@ -353,15 +356,20 @@ async def test_team_update_sc_2():
|
||||||
{"role": "admin", "user_id": admin_user},
|
{"role": "admin", "user_id": admin_user},
|
||||||
]
|
]
|
||||||
team_data = await new_team(session=session, i=0, member_list=member_list)
|
team_data = await new_team(session=session, i=0, member_list=member_list)
|
||||||
## Create new normal user
|
## Create 10 normal users
|
||||||
new_normal_user = f"krrish_{uuid.uuid4()}@berri.ai"
|
members = [
|
||||||
|
{"role": "user", "user_id": f"krrish_{uuid.uuid4()}@berri.ai"}
|
||||||
|
for _ in range(10)
|
||||||
|
]
|
||||||
await add_member(
|
await add_member(
|
||||||
session=session,
|
session=session, i=0, team_id=team_data["team_id"], members=members
|
||||||
i=0,
|
|
||||||
team_id=team_data["team_id"],
|
|
||||||
user_id=None,
|
|
||||||
user_email=new_normal_user,
|
|
||||||
)
|
)
|
||||||
|
## ASSERT TEAM SIZE
|
||||||
|
team_info = await get_team_info(
|
||||||
|
session=session, get_team=team_data["team_id"], call_key="sk-1234"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(team_info["team_info"]["members_with_roles"]) == 12
|
||||||
|
|
||||||
## CHANGE TEAM ALIAS
|
## CHANGE TEAM ALIAS
|
||||||
|
|
||||||
|
@ -570,4 +578,4 @@ async def test_users_in_team_budget():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("got exception, this is expected")
|
print("got exception, this is expected")
|
||||||
print(e)
|
print(e)
|
||||||
assert "Crossed spend within team" in str(e)
|
assert "Budget has been exceeded" in str(e)
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""]
|
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
||||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
||||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -119,9 +119,24 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
||||||
|
|
||||||
// Now, 'options' contains the list you wanted
|
// Now, 'options' contains the list you wanted
|
||||||
console.log(options); // You can log it to verify the list
|
console.log(options); // You can log it to verify the list
|
||||||
|
|
||||||
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
|
// if options.length > 0, only store unique values
|
||||||
setModelInfo(options);
|
if (options.length > 0) {
|
||||||
|
const uniqueModels = Array.from(new Set(options));
|
||||||
|
|
||||||
|
console.log("Unique models:", uniqueModels);
|
||||||
|
|
||||||
|
// sort uniqueModels alphabetically
|
||||||
|
uniqueModels.sort((a: any, b: any) => a.label.localeCompare(b.label));
|
||||||
|
|
||||||
|
|
||||||
|
console.log("Model info:", modelInfo);
|
||||||
|
|
||||||
|
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
|
||||||
|
setModelInfo(uniqueModels);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
setSelectedModel(fetchedAvailableModels.data[0].id);
|
setSelectedModel(fetchedAvailableModels.data[0].id);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
|
@ -1130,7 +1130,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
setSelectedAPIKey(key);
|
setSelectedAPIKey(key);
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
✨ {key["key_alias"]} (Enterpise only Feature)
|
✨ {key["key_alias"]} (Enterprise only Feature)
|
||||||
</SelectItem>
|
</SelectItem>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1165,7 +1165,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
setSelectedCustomer(user);
|
setSelectedCustomer(user);
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
✨ {user} (Enterpise only Feature)
|
✨ {user} (Enterprise only Feature)
|
||||||
</SelectItem>
|
</SelectItem>
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
|
|
|
@ -114,7 +114,7 @@ const Navbar: React.FC<NavbarProps> = ({
|
||||||
textDecoration: "underline",
|
textDecoration: "underline",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Get enterpise license
|
Get enterprise license
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
|
@ -832,7 +832,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
disabled={true}
|
disabled={true}
|
||||||
>
|
>
|
||||||
✨ {tag} (Enterpise only Feature)
|
✨ {tag} (Enterprise only Feature)
|
||||||
</SelectItem>
|
</SelectItem>
|
||||||
);
|
);
|
||||||
})}
|
})}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue