forked from phoenix/litellm-mirror
Merge branch 'main' into feat/friendliai
This commit is contained in:
commit
776c75c1e5
99 changed files with 202794 additions and 632 deletions
|
@ -202,6 +202,7 @@ jobs:
|
|||
-e REDIS_PORT=$REDIS_PORT \
|
||||
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
|
||||
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
|
||||
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
|
||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||
|
|
|
@ -150,7 +150,7 @@ $ litellm --config /path/to/config.yaml
|
|||
```bash
|
||||
curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
**Create a Thread**
|
||||
|
@ -162,6 +162,14 @@ curl http://0.0.0.0:4000/v1/threads \
|
|||
-d ''
|
||||
```
|
||||
|
||||
**Get a Thread**
|
||||
|
||||
```bash
|
||||
curl http://0.0.0.0:4000/v1/threads/{thread_id} \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
**Add Messages to the Thread**
|
||||
|
||||
```bash
|
||||
|
|
|
@ -212,6 +212,94 @@ If you run the code two times, response1 will use the cache from the first run t
|
|||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
## Switch Cache On / Off Per LiteLLM Call
|
||||
|
||||
LiteLLM supports 4 cache-controls:
|
||||
|
||||
- `no-cache`: *Optional(bool)* When `True`, Will not return a cached response, but instead call the actual endpoint.
|
||||
- `no-store`: *Optional(bool)* When `True`, Will not cache the response.
|
||||
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
|
||||
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
|
||||
|
||||
[Let us know if you need more](https://github.com/BerriAI/litellm/issues/1218)
|
||||
<Tabs>
|
||||
<TabItem value="no-cache" label="No-Cache">
|
||||
|
||||
Example usage `no-cache` - When `True`, Will not return a cached response
|
||||
|
||||
```python
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello who are you"
|
||||
}
|
||||
],
|
||||
cache={"no-cache": True},
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="no-store" label="No-Store">
|
||||
|
||||
Example usage `no-store` - When `True`, Will not cache the response.
|
||||
|
||||
```python
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello who are you"
|
||||
}
|
||||
],
|
||||
cache={"no-store": True},
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="ttl" label="ttl">
|
||||
Example usage `ttl` - cache the response for 10 seconds
|
||||
|
||||
```python
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello who are you"
|
||||
}
|
||||
],
|
||||
cache={"ttl": 10},
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="s-maxage" label="s-maxage">
|
||||
Example usage `s-maxage` - Will only accept cached responses for 60 seconds
|
||||
|
||||
```python
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello who are you"
|
||||
}
|
||||
],
|
||||
cache={"s-maxage": 60},
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
</Tabs>
|
||||
|
||||
## Cache Context Manager - Enable, Disable, Update Cache
|
||||
|
|
46
docs/my-website/docs/observability/raw_request_response.md
Normal file
46
docs/my-website/docs/observability/raw_request_response.md
Normal file
|
@ -0,0 +1,46 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
|
||||
# Raw Request/Response Logging
|
||||
|
||||
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
|
||||
|
||||
**on SDK**
|
||||
```python
|
||||
# pip install langfuse
|
||||
import litellm
|
||||
import os
|
||||
|
||||
# log raw request/response
|
||||
litellm.log_raw_request_response = True
|
||||
|
||||
# from https://cloud.langfuse.com/
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||
# Optional, defaults to https://cloud.langfuse.com
|
||||
os.environ["LANGFUSE_HOST"] # optional
|
||||
|
||||
# LLM API Keys
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
|
||||
# set langfuse as a callback, litellm will send the data to langfuse
|
||||
litellm.success_callback = ["langfuse"]
|
||||
|
||||
# openai call
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
**on Proxy**
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
log_raw_request_response: True
|
||||
```
|
||||
|
||||
**Expected Log**
|
||||
|
||||
<Image img={require('../../img/raw_request_log.png')}/>
|
|
@ -1,3 +1,5 @@
|
|||
# llmcord.py
|
||||
|
||||
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
|
||||
|
||||
Github: https://github.com/jakobdylanc/discord-llm-chatbot
|
|
@ -11,7 +11,7 @@ LiteLLM supports
|
|||
|
||||
:::info
|
||||
|
||||
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed
|
||||
Anthropic API fails requests when `max_tokens` are not passed. Due to this litellm passes `max_tokens=4096` when no `max_tokens` are passed.
|
||||
|
||||
:::
|
||||
|
||||
|
@ -229,17 +229,6 @@ assert isinstance(
|
|||
|
||||
```
|
||||
|
||||
### Setting `anthropic-beta` Header in Requests
|
||||
|
||||
Pass the the `extra_headers` param to litellm, All headers will be forwarded to Anthropic API
|
||||
|
||||
```python
|
||||
response = completion(
|
||||
model="anthropic/claude-3-opus-20240229",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
```
|
||||
|
||||
### Forcing Anthropic Tool Use
|
||||
|
||||
|
|
|
@ -3,53 +3,155 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# Azure AI Studio
|
||||
|
||||
**Ensure the following:**
|
||||
1. The API Base passed ends in the `/v1/` prefix
|
||||
example:
|
||||
```python
|
||||
api_base = "https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/"
|
||||
```
|
||||
LiteLLM supports all models on Azure AI Studio
|
||||
|
||||
2. The `model` passed is listed in [supported models](#supported-models). You **DO NOT** Need to pass your deployment name to litellm. Example `model=azure/Mistral-large-nmefg`
|
||||
|
||||
## Usage
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
### ENV VAR
|
||||
```python
|
||||
import litellm
|
||||
response = litellm.completion(
|
||||
model="azure/command-r-plus",
|
||||
api_base="<your-deployment-base>/v1/"
|
||||
api_key="eskk******"
|
||||
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||
import os
|
||||
os.environ["AZURE_API_API_KEY"] = ""
|
||||
os.environ["AZURE_AI_API_BASE"] = ""
|
||||
```
|
||||
|
||||
### Example Call
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import os
|
||||
## set ENV variables
|
||||
os.environ["AZURE_API_API_KEY"] = "azure ai key"
|
||||
os.environ["AZURE_AI_API_BASE"] = "azure ai base url" # e.g.: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/
|
||||
|
||||
# predibase llama-3 call
|
||||
response = completion(
|
||||
model="azure_ai/command-r-plus",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
## Sample Usage - LiteLLM Proxy
|
||||
|
||||
1. Add models to your config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: mistral
|
||||
litellm_params:
|
||||
model: azure/mistral-large-latest
|
||||
api_base: https://Mistral-large-dfgfj-serverless.eastus2.inference.ai.azure.com/v1/
|
||||
api_key: JGbKodRcTp****
|
||||
- model_name: command-r-plus
|
||||
litellm_params:
|
||||
model: azure/command-r-plus
|
||||
api_key: os.environ/AZURE_COHERE_API_KEY
|
||||
api_base: os.environ/AZURE_COHERE_API_BASE
|
||||
model: azure_ai/command-r-plus
|
||||
api_key: os.environ/AZURE_AI_API_KEY
|
||||
api_base: os.environ/AZURE_AI_API_BASE
|
||||
```
|
||||
|
||||
|
||||
|
||||
2. Start the proxy
|
||||
|
||||
```bash
|
||||
$ litellm --config /path/to/config.yaml --debug
|
||||
```
|
||||
|
||||
3. Send Request to LiteLLM Proxy Server
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
|
||||
base_url="http://0.0.0.0:4000" # litellm-proxy-base url
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="command-r-plus",
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Be a good human!"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What do you know about earth?"
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="curl" label="curl">
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": "command-r-plus",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Be a good human!"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What do you know about earth?"
|
||||
}
|
||||
],
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
## Passing additional params - max_tokens, temperature
|
||||
See all litellm.completion supported params [here](../completion/input.md#translated-openai-params)
|
||||
|
||||
```python
|
||||
# !pip install litellm
|
||||
from litellm import completion
|
||||
import os
|
||||
## set ENV variables
|
||||
os.environ["AZURE_AI_API_KEY"] = "azure ai api key"
|
||||
os.environ["AZURE_AI_API_BASE"] = "azure ai api base"
|
||||
|
||||
# command r plus call
|
||||
response = completion(
|
||||
model="azure_ai/command-r-plus",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
max_tokens=20,
|
||||
temperature=0.5
|
||||
)
|
||||
```
|
||||
|
||||
**proxy**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: command-r-plus
|
||||
litellm_params:
|
||||
model: azure_ai/command-r-plus
|
||||
api_key: os.environ/AZURE_AI_API_KEY
|
||||
api_base: os.environ/AZURE_AI_API_BASE
|
||||
max_tokens: 20
|
||||
temperature: 0.5
|
||||
```
|
||||
|
||||
|
||||
|
||||
2. Start the proxy
|
||||
|
||||
```bash
|
||||
|
@ -103,9 +205,6 @@ response = litellm.completion(
|
|||
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Function Calling
|
||||
|
||||
<Tabs>
|
||||
|
@ -115,8 +214,8 @@ response = litellm.completion(
|
|||
from litellm import completion
|
||||
|
||||
# set env
|
||||
os.environ["AZURE_MISTRAL_API_KEY"] = "your-api-key"
|
||||
os.environ["AZURE_MISTRAL_API_BASE"] = "your-api-base"
|
||||
os.environ["AZURE_AI_API_KEY"] = "your-api-key"
|
||||
os.environ["AZURE_AI_API_BASE"] = "your-api-base"
|
||||
|
||||
tools = [
|
||||
{
|
||||
|
@ -141,9 +240,7 @@ tools = [
|
|||
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
||||
|
||||
response = completion(
|
||||
model="azure/mistral-large-latest",
|
||||
api_base=os.getenv("AZURE_MISTRAL_API_BASE")
|
||||
api_key=os.getenv("AZURE_MISTRAL_API_KEY")
|
||||
model="azure_ai/mistral-large-latest",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
|
@ -206,10 +303,12 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
|||
|
||||
## Supported Models
|
||||
|
||||
LiteLLM supports **ALL** azure ai models. Here's a few examples:
|
||||
|
||||
| Model Name | Function Call |
|
||||
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| Cohere command-r-plus | `completion(model="azure/command-r-plus", messages)` |
|
||||
| Cohere ommand-r | `completion(model="azure/command-r", messages)` |
|
||||
| Cohere command-r | `completion(model="azure/command-r", messages)` |
|
||||
| mistral-large-latest | `completion(model="azure/mistral-large-latest", messages)` |
|
||||
|
||||
|
||||
|
|
|
@ -144,16 +144,135 @@ print(response)
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Set temperature, top p, etc.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||
os.environ["AWS_REGION_NAME"] = ""
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
temperature=0.7,
|
||||
top_p=1
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
**Set on yaml**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: bedrock-claude-v1
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-instant-v1
|
||||
temperature: <your-temp>
|
||||
top_p: <your-top-p>
|
||||
```
|
||||
|
||||
**Set on request**
|
||||
|
||||
```python
|
||||
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
temperature=0.7,
|
||||
top_p=1
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Pass provider-specific params
|
||||
|
||||
If you pass a non-openai param to litellm, we'll assume it's provider-specific and send it as a kwarg in the request body. [See more](../completion/input.md#provider-specific-params)
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||
os.environ["AWS_REGION_NAME"] = ""
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
**Set on yaml**
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: bedrock-claude-v1
|
||||
litellm_params:
|
||||
model: bedrock/anthropic.claude-instant-v1
|
||||
top_k: 1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||
```
|
||||
|
||||
**Set on request**
|
||||
|
||||
```python
|
||||
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
temperature=0.7,
|
||||
extra_body={
|
||||
top_k=1 # 👈 PROVIDER-SPECIFIC PARAM
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Usage - Function Calling
|
||||
|
||||
:::info
|
||||
|
||||
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
|
||||
|
||||
You can see the raw response via `response._hidden_params["original_response"]`.
|
||||
|
||||
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
|
||||
:::
|
||||
LiteLLM uses Bedrock's Converse API for making tool calls
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
@ -361,47 +480,6 @@ response = completion(
|
|||
)
|
||||
```
|
||||
|
||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
|
||||
|
||||
Create a client from session credentials:
|
||||
```python
|
||||
import boto3
|
||||
from litellm import completion
|
||||
|
||||
bedrock = boto3.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name="us-east-1",
|
||||
aws_access_key_id="",
|
||||
aws_secret_access_key="",
|
||||
aws_session_token="",
|
||||
)
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
aws_bedrock_client=bedrock,
|
||||
)
|
||||
```
|
||||
|
||||
Create a client from AWS profile in `~/.aws/config`:
|
||||
```python
|
||||
import boto3
|
||||
from litellm import completion
|
||||
|
||||
dev_session = boto3.Session(profile_name="dev-profile")
|
||||
bedrock = dev_session.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name="us-east-1",
|
||||
)
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
aws_bedrock_client=bedrock,
|
||||
)
|
||||
```
|
||||
|
||||
### SSO Login (AWS Profile)
|
||||
- Set `AWS_PROFILE` environment variable
|
||||
- Make bedrock completion call
|
||||
|
@ -464,6 +542,56 @@ response = completion(
|
|||
)
|
||||
```
|
||||
|
||||
|
||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||
|
||||
:::warning
|
||||
|
||||
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
||||
|
||||
:::
|
||||
|
||||
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
|
||||
|
||||
Create a client from session credentials:
|
||||
```python
|
||||
import boto3
|
||||
from litellm import completion
|
||||
|
||||
bedrock = boto3.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name="us-east-1",
|
||||
aws_access_key_id="",
|
||||
aws_secret_access_key="",
|
||||
aws_session_token="",
|
||||
)
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
aws_bedrock_client=bedrock,
|
||||
)
|
||||
```
|
||||
|
||||
Create a client from AWS profile in `~/.aws/config`:
|
||||
```python
|
||||
import boto3
|
||||
from litellm import completion
|
||||
|
||||
dev_session = boto3.Session(profile_name="dev-profile")
|
||||
bedrock = dev_session.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name="us-east-1",
|
||||
)
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
aws_bedrock_client=bedrock,
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## Provisioned throughput models
|
||||
To use provisioned throughput Bedrock models pass
|
||||
- `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models)
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
# 🆕 Clarifai
|
||||
# Clarifai
|
||||
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
|
||||
|
||||
:::warning
|
||||
|
||||
Streaming is not yet supported on using clarifai and litellm. Tracking support here: https://github.com/BerriAI/litellm/issues/4162
|
||||
|
||||
:::
|
||||
|
||||
## Pre-Requisites
|
||||
|
||||
`pip install clarifai`
|
||||
|
||||
`pip install litellm`
|
||||
|
||||
## Required Environment Variables
|
||||
|
@ -12,6 +15,7 @@ To obtain your Clarifai Personal access token follow this [link](https://docs.cl
|
|||
|
||||
```python
|
||||
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
||||
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
@ -68,7 +72,7 @@ Example Usage - Note: liteLLM supports all models deployed on Clarifai
|
|||
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
|
||||
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
|
||||
|
||||
## Mistal LLMs
|
||||
## Mistral LLMs
|
||||
| Model Name | Function Call |
|
||||
|---------------------------------------------|------------------------------------------------------------------------|
|
||||
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |
|
||||
|
|
|
@ -125,11 +125,12 @@ See all litellm.completion supported params [here](../completion/input.md#transl
|
|||
from litellm import completion
|
||||
import os
|
||||
## set ENV variables
|
||||
os.environ["PREDIBASE_API_KEY"] = "predibase key"
|
||||
os.environ["DATABRICKS_API_KEY"] = "databricks key"
|
||||
os.environ["DATABRICKS_API_BASE"] = "databricks api base"
|
||||
|
||||
# predibae llama-3 call
|
||||
# databricks dbrx call
|
||||
response = completion(
|
||||
model="predibase/llama3-8b-instruct",
|
||||
model="databricks/databricks-dbrx-instruct",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
max_tokens=20,
|
||||
temperature=0.5
|
||||
|
|
|
@ -449,6 +449,54 @@ print(response)
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Usage - Function Calling
|
||||
|
||||
LiteLLM supports Function Calling for Vertex AI gemini models.
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import os
|
||||
# set env
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ".."
|
||||
os.environ["VERTEX_AI_PROJECT"] = ".."
|
||||
os.environ["VERTEX_AI_LOCATION"] = ".."
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
||||
|
||||
response = completion(
|
||||
model="vertex_ai/gemini-pro-vision",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
)
|
||||
# Add any assertions, here to check response args
|
||||
print(response)
|
||||
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
|
||||
assert isinstance(
|
||||
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Chat Models
|
||||
| Model Name | Function Call |
|
||||
|
@ -500,6 +548,8 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
|
|||
|
||||
| Model Name | Function Call |
|
||||
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| text-embedding-004 | `embedding(model="vertex_ai/text-embedding-004", input)` |
|
||||
| text-multilingual-embedding-002 | `embedding(model="vertex_ai/text-multilingual-embedding-002", input)` |
|
||||
| textembedding-gecko | `embedding(model="vertex_ai/textembedding-gecko", input)` |
|
||||
| textembedding-gecko-multilingual | `embedding(model="vertex_ai/textembedding-gecko-multilingual", input)` |
|
||||
| textembedding-gecko-multilingual@001 | `embedding(model="vertex_ai/textembedding-gecko-multilingual@001", input)` |
|
||||
|
@ -508,6 +558,29 @@ All models listed [here](https://github.com/BerriAI/litellm/blob/57f37f743886a02
|
|||
| text-embedding-preview-0409 | `embedding(model="vertex_ai/text-embedding-preview-0409", input)` |
|
||||
| text-multilingual-embedding-preview-0409 | `embedding(model="vertex_ai/text-multilingual-embedding-preview-0409", input)` |
|
||||
|
||||
### Advanced Use `task_type` and `title` (Vertex Specific Params)
|
||||
|
||||
👉 `task_type` and `title` are vertex specific params
|
||||
|
||||
LiteLLM Supported Vertex Specific Params
|
||||
|
||||
```python
|
||||
auto_truncate: Optional[bool] = None
|
||||
task_type: Optional[Literal["RETRIEVAL_QUERY","RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]] = None
|
||||
title: Optional[str] = None # The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||
```
|
||||
|
||||
**Example Usage with LiteLLM**
|
||||
```python
|
||||
response = litellm.embedding(
|
||||
model="vertex_ai/text-embedding-004",
|
||||
input=["good morning from litellm", "gm"]
|
||||
task_type = "RETRIEVAL_DOCUMENT",
|
||||
dimensions=1,
|
||||
auto_truncate=True,
|
||||
)
|
||||
```
|
||||
|
||||
## Image Generation Models
|
||||
|
||||
Usage
|
||||
|
|
|
@ -138,14 +138,22 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
|
|||
<Image img={require('../../img/admin_ui_spend.png')} />
|
||||
|
||||
## API Endpoints to get Spend
|
||||
#### Getting Spend Reports - To Charge Other Teams, API Keys
|
||||
#### Getting Spend Reports - To Charge Other Teams, Customers
|
||||
|
||||
Use the `/global/spend/report` endpoint to get daily spend per team, with a breakdown of spend per API Key, Model
|
||||
Use the `/global/spend/report` endpoint to get daily spend report per
|
||||
- team
|
||||
- customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="per team" label="Spend Per Team">
|
||||
|
||||
##### Example Request
|
||||
|
||||
👉 Key Change: Specify `group_by=team`
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=team' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
|
@ -254,6 +262,69 @@ Output from script
|
|||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="per customer" label="Spend Per Customer">
|
||||
|
||||
##### Example Request
|
||||
|
||||
👉 Key Change: Specify `group_by=customer`
|
||||
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=customer' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
##### Example Response
|
||||
|
||||
|
||||
```shell
|
||||
[
|
||||
{
|
||||
"group_by_day": "2024-04-30T00:00:00+00:00",
|
||||
"customers": [
|
||||
{
|
||||
"customer": "palantir",
|
||||
"total_spend": 0.0015265,
|
||||
"metadata": [ # see the spend by unique(key + model)
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"spend": 0.00123,
|
||||
"total_tokens": 28,
|
||||
"api_key": "88dc28.." # the hashed api key
|
||||
},
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"spend": 0.00123,
|
||||
"total_tokens": 28,
|
||||
"api_key": "a73dc2.." # the hashed api key
|
||||
},
|
||||
{
|
||||
"model": "chatgpt-v-2",
|
||||
"spend": 0.000214,
|
||||
"total_tokens": 122,
|
||||
"api_key": "898c28.." # the hashed api key
|
||||
},
|
||||
{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"spend": 0.0000825,
|
||||
"total_tokens": 85,
|
||||
"api_key": "84dc28.." # the hashed api key
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
@ -356,4 +427,23 @@ model_list:
|
|||
|
||||
## Custom Input/Output Pricing
|
||||
|
||||
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
|
||||
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
|
||||
|
||||
## ✨ Custom k,v pairs
|
||||
|
||||
Log specific key,value pairs as part of the metadata for a spend log
|
||||
|
||||
:::info
|
||||
|
||||
Logging specific key,value pairs in spend logs metadata is an enterprise feature. [See here](./enterprise.md#tracking-spend-with-custom-metadata)
|
||||
|
||||
:::
|
||||
|
||||
|
||||
## ✨ Custom Tags
|
||||
|
||||
:::info
|
||||
|
||||
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
|
||||
|
||||
:::
|
|
@ -42,6 +42,14 @@ Set `JSON_LOGS="True"` in your env:
|
|||
```bash
|
||||
export JSON_LOGS="True"
|
||||
```
|
||||
**OR**
|
||||
|
||||
Set `json_logs: true` in your yaml:
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
json_logs: true
|
||||
```
|
||||
|
||||
Start proxy
|
||||
|
||||
|
@ -49,4 +57,35 @@ Start proxy
|
|||
$ litellm
|
||||
```
|
||||
|
||||
The proxy will now all logs in json format.
|
||||
The proxy will now all logs in json format.
|
||||
|
||||
## Control Log Output
|
||||
|
||||
Turn off fastapi's default 'INFO' logs
|
||||
|
||||
1. Turn on 'json logs'
|
||||
```yaml
|
||||
litellm_settings:
|
||||
json_logs: true
|
||||
```
|
||||
|
||||
2. Set `LITELLM_LOG` to 'ERROR'
|
||||
|
||||
Only get logs if an error occurs.
|
||||
|
||||
```bash
|
||||
LITELLM_LOG="ERROR"
|
||||
```
|
||||
|
||||
3. Start proxy
|
||||
|
||||
|
||||
```bash
|
||||
$ litellm
|
||||
```
|
||||
|
||||
Expected Output:
|
||||
|
||||
```bash
|
||||
# no info statements
|
||||
```
|
|
@ -1,5 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
import Image from '@theme/IdealImage';
|
||||
|
||||
# 🐳 Docker, Deploying LiteLLM Proxy
|
||||
|
||||
|
@ -537,7 +538,9 @@ ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
|||
|
||||
## Advanced Deployment Settings
|
||||
|
||||
### Customization of the server root path
|
||||
### 1. Customization of the server root path (custom Proxy base url)
|
||||
|
||||
💥 Use this when you want to serve LiteLLM on a custom base url path like `https://localhost:4000/api/v1`
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -548,9 +551,29 @@ In a Kubernetes deployment, it's possible to utilize a shared DNS to host multip
|
|||
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
|
||||
|
||||
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
|
||||
```
|
||||
export SERVER_ROOT_PATH="/api/v1"
|
||||
```
|
||||
|
||||
**Step 1. Run Proxy with `SERVER_ROOT_PATH` set in your env **
|
||||
|
||||
### Setting SSL Certification
|
||||
```shell
|
||||
docker run --name litellm-proxy \
|
||||
-e DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname> \
|
||||
-e SERVER_ROOT_PATH="/api/v1" \
|
||||
-p 4000:4000 \
|
||||
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
||||
```
|
||||
|
||||
After running the proxy you can access it on `http://0.0.0.0:4000/api/v1/` (since we set `SERVER_ROOT_PATH="/api/v1"`)
|
||||
|
||||
**Step 2. Verify Running on correct path**
|
||||
|
||||
<Image img={require('../../img/custom_root_path.png')} />
|
||||
|
||||
**That's it**, that's all you need to run the proxy on a custom root path
|
||||
|
||||
### 2. Setting SSL Certification
|
||||
|
||||
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
||||
|
||||
|
|
|
@ -205,6 +205,146 @@ curl -X GET "http://0.0.0.0:4000/spend/tags" \
|
|||
```
|
||||
|
||||
|
||||
## Tracking Spend with custom metadata
|
||||
|
||||
Requirements:
|
||||
|
||||
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
||||
|
||||
#### Usage - /chat/completions requests with special spend logs metadata
|
||||
|
||||
|
||||
<Tabs>
|
||||
|
||||
|
||||
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||
|
||||
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"spend_logs_metadata": {
|
||||
"hello": "world"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="Curl" label="Curl Request">
|
||||
|
||||
Pass `metadata` as part of the request body
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"spend_logs_metadata": {
|
||||
"hello": "world"
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model = "gpt-3.5-turbo",
|
||||
temperature=0.1,
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"spend_logs_metadata": {
|
||||
"hello": "world"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
#### Viewing Spend w/ custom metadata
|
||||
|
||||
#### `/spend/logs` Request Format
|
||||
|
||||
```bash
|
||||
curl -X GET "http://0.0.0.0:4000/spend/logs?request_id=<your-call-id" \ # e.g.: chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
#### `/spend/logs` Response Format
|
||||
```bash
|
||||
[
|
||||
{
|
||||
"request_id": "chatcmpl-9ZKMURhVYSi9D6r6PJ9vLcayIK0Vm",
|
||||
"call_type": "acompletion",
|
||||
"metadata": {
|
||||
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||
"user_api_key_alias": null,
|
||||
"spend_logs_metadata": { # 👈 LOGGED CUSTOM METADATA
|
||||
"hello": "world"
|
||||
},
|
||||
"user_api_key_team_id": null,
|
||||
"user_api_key_user_id": "116544810872468347480",
|
||||
"user_api_key_team_alias": null
|
||||
},
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Enforce Required Params for LLM Requests
|
||||
Use this when you want to enforce all requests to include certain params. Example you need all requests to include the `user` and `["metadata]["generation_name"]` params.
|
||||
|
||||
|
|
|
@ -606,6 +606,52 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
|
||||
** 🎉 Expect to see this trace logged in your OTEL collector**
|
||||
|
||||
### Context propagation across Services `Traceparent HTTP Header`
|
||||
|
||||
❓ Use this when you want to **pass information about the incoming request in a distributed tracing system**
|
||||
|
||||
✅ Key change: Pass the **`traceparent` header** in your requests. [Read more about traceparent headers here](https://uptrace.dev/opentelemetry/opentelemetry-traceparent.html#what-is-traceparent-header)
|
||||
```curl
|
||||
traceparent: 00-80e1afed08e019fc1110464cfa66635c-7a085853722dc6d2-01
|
||||
```
|
||||
Example Usage
|
||||
1. Make Request to LiteLLM Proxy with `traceparent` header
|
||||
```python
|
||||
import openai
|
||||
import uuid
|
||||
|
||||
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
|
||||
extra_headers = {
|
||||
"traceparent": example_traceparent
|
||||
}
|
||||
_trace_id = example_traceparent.split("-")[1]
|
||||
|
||||
print("EXTRA HEADERS: ", extra_headers)
|
||||
print("Trace ID: ", _trace_id)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="llama3",
|
||||
messages=[
|
||||
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||
],
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
```
|
||||
|
||||
```shell
|
||||
# EXTRA HEADERS: {'traceparent': '00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01'}
|
||||
# Trace ID: 80e1afed08e019fc1110464cfa66635c
|
||||
```
|
||||
|
||||
2. Lookup Trace ID on OTEL Logger
|
||||
|
||||
Search for Trace=`80e1afed08e019fc1110464cfa66635c` on your OTEL Collector
|
||||
|
||||
<Image img={require('../../img/otel_parent.png')} />
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ general_settings:
|
|||
|
||||
litellm_settings:
|
||||
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
|
||||
json_logs: true # Get debug logs in json format
|
||||
```
|
||||
|
||||
Set slack webhook url in your env
|
||||
|
@ -28,6 +29,11 @@ Set slack webhook url in your env
|
|||
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/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"
|
||||
```
|
||||
|
||||
Turn off FASTAPI's default info logs
|
||||
```bash
|
||||
export LITELLM_LOG="ERROR"
|
||||
```
|
||||
|
||||
:::info
|
||||
|
||||
Need Help or want dedicated support ? Talk to a founder [here]: (https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||
|
|
|
@ -2,18 +2,13 @@ import Image from '@theme/IdealImage';
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# 🔥 Fallbacks, Retries, Timeouts, Load Balancing
|
||||
# 🔥 Load Balancing, Fallbacks, Retries, Timeouts
|
||||
|
||||
Retry call with multiple instances of the same model.
|
||||
|
||||
If a call fails after num_retries, fall back to another model group.
|
||||
|
||||
If the error is a context window exceeded error, fall back to a larger model group (if given).
|
||||
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
||||
- Quick Start [load balancing](#test---load-balancing)
|
||||
- Quick Start [client side fallbacks](#test---client-side-fallbacks)
|
||||
|
||||
## Quick Start - Load Balancing
|
||||
### Step 1 - Set deployments on config
|
||||
#### Step 1 - Set deployments on config
|
||||
|
||||
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
|
||||
```yaml
|
||||
|
@ -38,50 +33,214 @@ model_list:
|
|||
rpm: 1440
|
||||
```
|
||||
|
||||
### Step 2: Start Proxy with config
|
||||
#### Step 2: Start Proxy with config
|
||||
|
||||
```shell
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
### Step 3: Use proxy - Call a model group [Load Balancing]
|
||||
Curl Command
|
||||
### Test - Load Balancing
|
||||
|
||||
Here requests with model=gpt-3.5-turbo will be routed across multiple instances of azure/gpt-3.5-turbo
|
||||
|
||||
👉 Key Change: `model="gpt-3.5-turbo"`
|
||||
|
||||
**Check the `model_id` in Response Headers to make sure the requests are being load balanced**
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="Curl" label="Curl Request">
|
||||
|
||||
Pass `metadata` as part of the request body
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
}
|
||||
'
|
||||
]
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
import os
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "anything"
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Usage - Call a specific model deployment
|
||||
If you want to call a specific model defined in the `config.yaml`, you can call the `litellm_params: model`
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
|
||||
### Test - Client Side Fallbacks
|
||||
In this request the following will occur:
|
||||
1. The request to `model="zephyr-beta"` will fail
|
||||
2. litellm proxy will loop through all the model_groups specified in `fallbacks=["gpt-3.5-turbo"]`
|
||||
3. The request to `model="gpt-3.5-turbo"` will succeed and the client making the request will get a response from gpt-3.5-turbo
|
||||
|
||||
👉 Key Change: `"fallbacks": ["gpt-3.5-turbo"]`
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="zephyr-beta",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"fallbacks": ["gpt-3.5-turbo"]
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="Curl" label="Curl Request">
|
||||
|
||||
Pass `metadata` as part of the request body
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": "zephyr-beta"",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
"fallbacks": ["gpt-3.5-turbo"]
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
import os
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "anything"
|
||||
|
||||
chat = ChatOpenAI(
|
||||
openai_api_base="http://0.0.0.0:4000",
|
||||
model="zephyr-beta",
|
||||
extra_body={
|
||||
"fallbacks": ["gpt-3.5-turbo"]
|
||||
}
|
||||
)
|
||||
|
||||
messages = [
|
||||
SystemMessage(
|
||||
content="You are a helpful assistant that im using to make a test request to."
|
||||
),
|
||||
HumanMessage(
|
||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||
),
|
||||
]
|
||||
response = chat(messages)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
|
||||
|
||||
<!--
|
||||
### Test it!
|
||||
|
||||
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "azure/gpt-turbo-small-ca",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
}
|
||||
'
|
||||
```
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
|
||||
"messages": [
|
||||
{"role": "user", "content": "what color is red"}
|
||||
],
|
||||
"mock_testing_fallbacks": true
|
||||
}'
|
||||
``` -->
|
||||
|
||||
## Fallbacks + Retries + Timeouts + Cooldowns
|
||||
## Advanced
|
||||
### Fallbacks + Retries + Timeouts + Cooldowns
|
||||
|
||||
**Set via config**
|
||||
```yaml
|
||||
|
@ -114,44 +273,7 @@ litellm_settings:
|
|||
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||
```
|
||||
|
||||
**Set dynamically**
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "zephyr-beta",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
],
|
||||
"fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
|
||||
"context_window_fallbacks": [{"zephyr-beta": ["gpt-3.5-turbo"]}],
|
||||
"num_retries": 2,
|
||||
"timeout": 10
|
||||
}
|
||||
'
|
||||
```
|
||||
|
||||
### Test it!
|
||||
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"model": "zephyr-beta", # 👈 MODEL NAME to fallback from
|
||||
"messages": [
|
||||
{"role": "user", "content": "what color is red"}
|
||||
],
|
||||
"mock_testing_fallbacks": true
|
||||
}'
|
||||
```
|
||||
|
||||
## Advanced - Context Window Fallbacks (Pre-Call Checks + Fallbacks)
|
||||
### Context Window Fallbacks (Pre-Call Checks + Fallbacks)
|
||||
|
||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||
|
||||
|
@ -287,7 +409,7 @@ print(response)
|
|||
</Tabs>
|
||||
|
||||
|
||||
## Advanced - EU-Region Filtering (Pre-Call Checks)
|
||||
### EU-Region Filtering (Pre-Call Checks)
|
||||
|
||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||
|
||||
|
@ -350,7 +472,7 @@ print(response)
|
|||
print(f"response.headers.get('x-litellm-model-api-base')")
|
||||
```
|
||||
|
||||
## Advanced - Custom Timeouts, Stream Timeouts - Per Model
|
||||
### Custom Timeouts, Stream Timeouts - Per Model
|
||||
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
|
||||
```yaml
|
||||
model_list:
|
||||
|
@ -379,7 +501,7 @@ $ litellm --config /path/to/config.yaml
|
|||
```
|
||||
|
||||
|
||||
## Advanced - Setting Dynamic Timeouts - Per Request
|
||||
### Setting Dynamic Timeouts - Per Request
|
||||
|
||||
LiteLLM Proxy supports setting a `timeout` per request
|
||||
|
||||
|
|
|
@ -77,6 +77,28 @@ litellm_settings:
|
|||
|
||||
#### Step 2: Setup Oauth Client
|
||||
<Tabs>
|
||||
<TabItem value="okta" label="Okta SSO">
|
||||
|
||||
1. Add Okta credentials to your .env
|
||||
|
||||
```bash
|
||||
GENERIC_CLIENT_ID = "<your-okta-client-id>"
|
||||
GENERIC_CLIENT_SECRET = "<your-okta-client-secret>"
|
||||
GENERIC_AUTHORIZATION_ENDPOINT = "<your-okta-domain>/authorize" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/authorize
|
||||
GENERIC_TOKEN_ENDPOINT = "<your-okta-domain>/token" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/oauth/token
|
||||
GENERIC_USERINFO_ENDPOINT = "<your-okta-domain>/userinfo" # https://dev-2kqkcd6lx6kdkuzt.us.auth0.com/userinfo
|
||||
```
|
||||
|
||||
You can get your domain specific auth/token/userinfo endpoints at `<YOUR-OKTA-DOMAIN>/.well-known/openid-configuration`
|
||||
|
||||
2. Add proxy url as callback_url on Okta
|
||||
|
||||
On Okta, add the 'callback_url' as `<proxy_base_url>/sso/callback`
|
||||
|
||||
|
||||
<Image img={require('../../img/okta_callback_url.png')} />
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="google" label="Google SSO">
|
||||
|
||||
- Create a new Oauth 2.0 Client on https://console.cloud.google.com/
|
||||
|
@ -115,7 +137,6 @@ MICROSOFT_TENANT="5a39737
|
|||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="Generic" label="Generic SSO Provider">
|
||||
|
||||
A generic OAuth client that can be used to quickly create support for any OAuth provider with close to no code
|
||||
|
|
|
@ -63,7 +63,7 @@ You can:
|
|||
- Add budgets to Teams
|
||||
|
||||
|
||||
#### **Add budgets to users**
|
||||
#### **Add budgets to teams**
|
||||
```shell
|
||||
curl --location 'http://localhost:4000/team/new' \
|
||||
--header 'Authorization: Bearer <your-master-key>' \
|
||||
|
@ -102,6 +102,22 @@ curl --location 'http://localhost:4000/team/new' \
|
|||
"budget_reset_at": null
|
||||
}
|
||||
```
|
||||
|
||||
#### **Add budget duration to teams**
|
||||
|
||||
`budget_duration`: Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
|
||||
```
|
||||
curl 'http://0.0.0.0:4000/team/new' \
|
||||
--header 'Authorization: Bearer <your-master-key>' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"team_alias": "my-new-team_4",
|
||||
"members_with_roles": [{"role": "admin", "user_id": "5c4a0aa3-a1e1-43dc-bd87-3c2da8382a3a"}],
|
||||
"budget_duration": 10s,
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="per-team-member" label="For Team Members">
|
||||
|
||||
|
|
BIN
docs/my-website/img/custom_root_path.png
Normal file
BIN
docs/my-website/img/custom_root_path.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 151 KiB |
BIN
docs/my-website/img/okta_callback_url.png
Normal file
BIN
docs/my-website/img/okta_callback_url.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 279 KiB |
BIN
docs/my-website/img/otel_parent.png
Normal file
BIN
docs/my-website/img/otel_parent.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 200 KiB |
BIN
docs/my-website/img/raw_request_log.png
Normal file
BIN
docs/my-website/img/raw_request_log.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 168 KiB |
|
@ -183,6 +183,7 @@ const sidebars = {
|
|||
label: "Logging & Observability",
|
||||
items: [
|
||||
"debugging/local_debugging",
|
||||
"observability/raw_request_response",
|
||||
"observability/callbacks",
|
||||
"observability/custom_callback",
|
||||
"observability/langfuse_integration",
|
||||
|
@ -256,6 +257,7 @@ const sidebars = {
|
|||
"projects/GPT Migrate",
|
||||
"projects/YiVal",
|
||||
"projects/LiteLLM Proxy",
|
||||
"projects/llm_cord",
|
||||
],
|
||||
},
|
||||
],
|
||||
|
|
|
@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
|
|||
pre_call_rules: List[Callable] = []
|
||||
post_call_rules: List[Callable] = []
|
||||
turn_off_message_logging: Optional[bool] = False
|
||||
log_raw_request_response: bool = False
|
||||
redact_messages_in_exceptions: Optional[bool] = False
|
||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||
## end of callbacks #############
|
||||
|
@ -407,6 +408,7 @@ openai_compatible_providers: List = [
|
|||
"together_ai",
|
||||
"fireworks_ai",
|
||||
"friendliai",
|
||||
"azure_ai",
|
||||
]
|
||||
|
||||
|
||||
|
@ -611,6 +613,7 @@ provider_list: List = [
|
|||
"baseten",
|
||||
"azure",
|
||||
"azure_text",
|
||||
"azure_ai",
|
||||
"sagemaker",
|
||||
"bedrock",
|
||||
"vllm",
|
||||
|
@ -765,7 +768,7 @@ from .llms.gemini import GeminiConfig
|
|||
from .llms.nlp_cloud import NLPCloudConfig
|
||||
from .llms.aleph_alpha import AlephAlphaConfig
|
||||
from .llms.petals import PetalsConfig
|
||||
from .llms.vertex_ai import VertexAIConfig
|
||||
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
|
||||
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
|
||||
from .llms.sagemaker import SagemakerConfig
|
||||
from .llms.ollama import OllamaConfig
|
||||
|
@ -787,6 +790,7 @@ from .llms.openai import (
|
|||
OpenAIConfig,
|
||||
OpenAITextCompletionConfig,
|
||||
MistralConfig,
|
||||
MistralEmbeddingConfig,
|
||||
DeepInfraConfig,
|
||||
)
|
||||
from .llms.azure import (
|
||||
|
|
|
@ -337,8 +337,6 @@ def response_cost_calculator(
|
|||
and custom_llm_provider is True
|
||||
): # override defaults if custom pricing is set
|
||||
base_model = model
|
||||
elif base_model is None:
|
||||
base_model = model
|
||||
# base_model defaults to None if not set on model_info
|
||||
response_cost = completion_cost(
|
||||
completion_response=response_object,
|
||||
|
|
|
@ -337,6 +337,7 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
|
|||
model=self.model, # type: ignore
|
||||
llm_provider=self.llm_provider, # type: ignore
|
||||
response=response,
|
||||
litellm_debug_info=self.litellm_debug_info,
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
def __str__(self):
|
||||
|
@ -379,6 +380,7 @@ class RejectedRequestError(BadRequestError): # type: ignore
|
|||
model=self.model, # type: ignore
|
||||
llm_provider=self.llm_provider, # type: ignore
|
||||
response=response,
|
||||
litellm_debug_info=self.litellm_debug_info,
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
def __str__(self):
|
||||
|
@ -418,6 +420,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
|||
model=self.model, # type: ignore
|
||||
llm_provider=self.llm_provider, # type: ignore
|
||||
response=response,
|
||||
litellm_debug_info=self.litellm_debug_info,
|
||||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
@ -6,17 +6,23 @@ import litellm
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.types.services import ServiceLoggerPayload
|
||||
from functools import wraps
|
||||
from typing import Union, Optional, TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
|
||||
from litellm.proxy._types import (
|
||||
ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
|
||||
)
|
||||
|
||||
Span = _Span
|
||||
UserAPIKeyAuth = _UserAPIKeyAuth
|
||||
ManagementEndpointLoggingPayload = _ManagementEndpointLoggingPayload
|
||||
else:
|
||||
Span = Any
|
||||
UserAPIKeyAuth = Any
|
||||
ManagementEndpointLoggingPayload = Any
|
||||
|
||||
|
||||
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||
|
@ -247,7 +253,7 @@ class OpenTelemetry(CustomLogger):
|
|||
span.end(end_time=self._to_ns(end_time))
|
||||
|
||||
def set_tools_attributes(self, span: Span, tools):
|
||||
from opentelemetry.semconv.ai import SpanAttributes
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
import json
|
||||
|
||||
if not tools:
|
||||
|
@ -272,7 +278,7 @@ class OpenTelemetry(CustomLogger):
|
|||
pass
|
||||
|
||||
def set_attributes(self, span: Span, kwargs, response_obj):
|
||||
from opentelemetry.semconv.ai import SpanAttributes
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
|
@ -407,7 +413,7 @@ class OpenTelemetry(CustomLogger):
|
|||
)
|
||||
|
||||
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
|
||||
from opentelemetry.semconv.ai import SpanAttributes
|
||||
from litellm.proxy._types import SpanAttributes
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
|
@ -454,6 +460,23 @@ class OpenTelemetry(CustomLogger):
|
|||
def _get_span_name(self, kwargs):
|
||||
return LITELLM_REQUEST_SPAN_NAME
|
||||
|
||||
def get_traceparent_from_header(self, headers):
|
||||
if headers is None:
|
||||
return None
|
||||
_traceparent = headers.get("traceparent", None)
|
||||
if _traceparent is None:
|
||||
return None
|
||||
|
||||
from opentelemetry.trace.propagation.tracecontext import (
|
||||
TraceContextTextMapPropagator,
|
||||
)
|
||||
|
||||
verbose_logger.debug("OpenTelemetry: GOT A TRACEPARENT {}".format(_traceparent))
|
||||
propagator = TraceContextTextMapPropagator()
|
||||
_parent_context = propagator.extract(carrier={"traceparent": _traceparent})
|
||||
verbose_logger.debug("OpenTelemetry: PARENT CONTEXT {}".format(_parent_context))
|
||||
return _parent_context
|
||||
|
||||
def _get_span_context(self, kwargs):
|
||||
from opentelemetry.trace.propagation.tracecontext import (
|
||||
TraceContextTextMapPropagator,
|
||||
|
@ -545,3 +568,91 @@ class OpenTelemetry(CustomLogger):
|
|||
self.OTEL_EXPORTER,
|
||||
)
|
||||
return BatchSpanProcessor(ConsoleSpanExporter())
|
||||
|
||||
async def async_management_endpoint_success_hook(
|
||||
self,
|
||||
logging_payload: ManagementEndpointLoggingPayload,
|
||||
parent_otel_span: Optional[Span] = None,
|
||||
):
|
||||
from opentelemetry import trace
|
||||
from datetime import datetime
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
||||
_start_time_ns = logging_payload.start_time
|
||||
_end_time_ns = logging_payload.end_time
|
||||
|
||||
start_time = logging_payload.start_time
|
||||
end_time = logging_payload.end_time
|
||||
|
||||
if isinstance(start_time, float):
|
||||
_start_time_ns = int(int(start_time) * 1e9)
|
||||
else:
|
||||
_start_time_ns = self._to_ns(start_time)
|
||||
|
||||
if isinstance(end_time, float):
|
||||
_end_time_ns = int(int(end_time) * 1e9)
|
||||
else:
|
||||
_end_time_ns = self._to_ns(end_time)
|
||||
|
||||
if parent_otel_span is not None:
|
||||
_span_name = logging_payload.route
|
||||
management_endpoint_span = self.tracer.start_span(
|
||||
name=_span_name,
|
||||
context=trace.set_span_in_context(parent_otel_span),
|
||||
start_time=_start_time_ns,
|
||||
)
|
||||
|
||||
_request_data = logging_payload.request_data
|
||||
if _request_data is not None:
|
||||
for key, value in _request_data.items():
|
||||
management_endpoint_span.set_attribute(f"request.{key}", value)
|
||||
|
||||
_response = logging_payload.response
|
||||
if _response is not None:
|
||||
for key, value in _response.items():
|
||||
management_endpoint_span.set_attribute(f"response.{key}", value)
|
||||
management_endpoint_span.set_status(Status(StatusCode.OK))
|
||||
management_endpoint_span.end(end_time=_end_time_ns)
|
||||
|
||||
async def async_management_endpoint_failure_hook(
|
||||
self,
|
||||
logging_payload: ManagementEndpointLoggingPayload,
|
||||
parent_otel_span: Optional[Span] = None,
|
||||
):
|
||||
from opentelemetry import trace
|
||||
from datetime import datetime
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
||||
_start_time_ns = logging_payload.start_time
|
||||
_end_time_ns = logging_payload.end_time
|
||||
|
||||
start_time = logging_payload.start_time
|
||||
end_time = logging_payload.end_time
|
||||
|
||||
if isinstance(start_time, float):
|
||||
_start_time_ns = int(int(start_time) * 1e9)
|
||||
else:
|
||||
_start_time_ns = self._to_ns(start_time)
|
||||
|
||||
if isinstance(end_time, float):
|
||||
_end_time_ns = int(int(end_time) * 1e9)
|
||||
else:
|
||||
_end_time_ns = self._to_ns(end_time)
|
||||
|
||||
if parent_otel_span is not None:
|
||||
_span_name = logging_payload.route
|
||||
management_endpoint_span = self.tracer.start_span(
|
||||
name=_span_name,
|
||||
context=trace.set_span_in_context(parent_otel_span),
|
||||
start_time=_start_time_ns,
|
||||
)
|
||||
|
||||
_request_data = logging_payload.request_data
|
||||
if _request_data is not None:
|
||||
for key, value in _request_data.items():
|
||||
management_endpoint_span.set_attribute(f"request.{key}", value)
|
||||
|
||||
_exception = logging_payload.exception
|
||||
management_endpoint_span.set_attribute(f"exception", str(_exception))
|
||||
management_endpoint_span.set_status(Status(StatusCode.ERROR))
|
||||
management_endpoint_span.end(end_time=_end_time_ns)
|
||||
|
|
|
@ -36,6 +36,9 @@ from ..types.llms.openai import (
|
|||
AsyncAssistantStreamManager,
|
||||
AssistantStreamManager,
|
||||
)
|
||||
from litellm.caching import DualCache
|
||||
|
||||
azure_ad_cache = DualCache()
|
||||
|
||||
|
||||
class AzureOpenAIError(Exception):
|
||||
|
@ -309,9 +312,10 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
|
|||
|
||||
def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||
azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
|
||||
azure_tenant = os.getenv("AZURE_TENANT_ID", None)
|
||||
azure_tenant_id = os.getenv("AZURE_TENANT_ID", None)
|
||||
azure_authority_host = os.getenv("AZURE_AUTHORITY_HOST", "https://login.microsoftonline.com")
|
||||
|
||||
if azure_client_id is None or azure_tenant is None:
|
||||
if azure_client_id is None or azure_tenant_id is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422,
|
||||
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
|
||||
|
@ -325,8 +329,19 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
|||
message="OIDC token could not be retrieved from secret manager.",
|
||||
)
|
||||
|
||||
azure_ad_token_cache_key = json.dumps({
|
||||
"azure_client_id": azure_client_id,
|
||||
"azure_tenant_id": azure_tenant_id,
|
||||
"azure_authority_host": azure_authority_host,
|
||||
"oidc_token": oidc_token,
|
||||
})
|
||||
|
||||
azure_ad_token_access_token = azure_ad_cache.get_cache(azure_ad_token_cache_key)
|
||||
if azure_ad_token_access_token is not None:
|
||||
return azure_ad_token_access_token
|
||||
|
||||
req_token = httpx.post(
|
||||
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token",
|
||||
f"{azure_authority_host}/{azure_tenant_id}/oauth2/v2.0/token",
|
||||
data={
|
||||
"client_id": azure_client_id,
|
||||
"grant_type": "client_credentials",
|
||||
|
@ -342,12 +357,23 @@ def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
|||
message=req_token.text,
|
||||
)
|
||||
|
||||
possible_azure_ad_token = req_token.json().get("access_token", None)
|
||||
azure_ad_token_json = req_token.json()
|
||||
azure_ad_token_access_token = azure_ad_token_json.get("access_token", None)
|
||||
azure_ad_token_expires_in = azure_ad_token_json.get("expires_in", None)
|
||||
|
||||
if possible_azure_ad_token is None:
|
||||
raise AzureOpenAIError(status_code=422, message="Azure AD Token not returned")
|
||||
if azure_ad_token_access_token is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message="Azure AD Token access_token not returned"
|
||||
)
|
||||
|
||||
return possible_azure_ad_token
|
||||
if azure_ad_token_expires_in is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message="Azure AD Token expires_in not returned"
|
||||
)
|
||||
|
||||
azure_ad_cache.set_cache(key=azure_ad_token_cache_key, value=azure_ad_token_access_token, ttl=azure_ad_token_expires_in)
|
||||
|
||||
return azure_ad_token_access_token
|
||||
|
||||
|
||||
class AzureChatCompletion(BaseLLM):
|
||||
|
|
|
@ -51,8 +51,11 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionResponseMessage,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionDeltaChunk,
|
||||
)
|
||||
from litellm.caching import DualCache
|
||||
|
||||
iam_cache = DualCache()
|
||||
|
||||
class AmazonCohereChatConfig:
|
||||
"""
|
||||
|
@ -324,38 +327,53 @@ class BedrockLLM(BaseLLM):
|
|||
) = params_to_check
|
||||
|
||||
### CHECK STS ###
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
iam_creds_cache_key = json.dumps({
|
||||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
})
|
||||
|
||||
if oidc_token is None:
|
||||
raise BedrockError(
|
||||
message="OIDC token could not be retrieved from secret manager.",
|
||||
status_code=401,
|
||||
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||
if iam_creds_dict is None:
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
|
||||
if oidc_token is None:
|
||||
raise BedrockError(
|
||||
message="OIDC token could not be retrieved from secret manager.",
|
||||
status_code=401,
|
||||
)
|
||||
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
region_name=aws_region_name,
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||
)
|
||||
|
||||
sts_client = boto3.client("sts")
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
sts_response = sts_client.assume_role_with_web_identity(
|
||||
RoleArn=aws_role_name,
|
||||
RoleSessionName=aws_session_name,
|
||||
WebIdentityToken=oidc_token,
|
||||
DurationSeconds=3600,
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
sts_response = sts_client.assume_role_with_web_identity(
|
||||
RoleArn=aws_role_name,
|
||||
RoleSessionName=aws_session_name,
|
||||
WebIdentityToken=oidc_token,
|
||||
DurationSeconds=3600,
|
||||
)
|
||||
iam_creds_dict = {
|
||||
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||
"region_name": aws_region_name,
|
||||
}
|
||||
|
||||
session = boto3.Session(
|
||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
||||
region_name=aws_region_name,
|
||||
)
|
||||
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||
|
||||
return session.get_credentials()
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
iam_creds = session.get_credentials()
|
||||
|
||||
return iam_creds
|
||||
elif aws_role_name is not None and aws_session_name is not None:
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
|
@ -1415,38 +1433,53 @@ class BedrockConverseLLM(BaseLLM):
|
|||
) = params_to_check
|
||||
|
||||
### CHECK STS ###
|
||||
if (
|
||||
aws_web_identity_token is not None
|
||||
and aws_role_name is not None
|
||||
and aws_session_name is not None
|
||||
):
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
iam_creds_cache_key = json.dumps({
|
||||
"aws_web_identity_token": aws_web_identity_token,
|
||||
"aws_role_name": aws_role_name,
|
||||
"aws_session_name": aws_session_name,
|
||||
"aws_region_name": aws_region_name,
|
||||
})
|
||||
|
||||
if oidc_token is None:
|
||||
raise BedrockError(
|
||||
message="OIDC token could not be retrieved from secret manager.",
|
||||
status_code=401,
|
||||
iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
|
||||
if iam_creds_dict is None:
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
|
||||
if oidc_token is None:
|
||||
raise BedrockError(
|
||||
message="OIDC token could not be retrieved from secret manager.",
|
||||
status_code=401,
|
||||
)
|
||||
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
region_name=aws_region_name,
|
||||
endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
|
||||
)
|
||||
|
||||
sts_client = boto3.client("sts")
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
sts_response = sts_client.assume_role_with_web_identity(
|
||||
RoleArn=aws_role_name,
|
||||
RoleSessionName=aws_session_name,
|
||||
WebIdentityToken=oidc_token,
|
||||
DurationSeconds=3600,
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
sts_response = sts_client.assume_role_with_web_identity(
|
||||
RoleArn=aws_role_name,
|
||||
RoleSessionName=aws_session_name,
|
||||
WebIdentityToken=oidc_token,
|
||||
DurationSeconds=3600,
|
||||
)
|
||||
iam_creds_dict = {
|
||||
"aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
|
||||
"aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
|
||||
"aws_session_token": sts_response["Credentials"]["SessionToken"],
|
||||
"region_name": aws_region_name,
|
||||
}
|
||||
|
||||
session = boto3.Session(
|
||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
||||
region_name=aws_region_name,
|
||||
)
|
||||
iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
|
||||
|
||||
return session.get_credentials()
|
||||
session = boto3.Session(**iam_creds_dict)
|
||||
|
||||
iam_creds = session.get_credentials()
|
||||
|
||||
return iam_creds
|
||||
elif aws_role_name is not None and aws_session_name is not None:
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
|
@ -1859,29 +1892,59 @@ class AWSEventStreamDecoder:
|
|||
self.parser = EventStreamJSONParser()
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
||||
text = ""
|
||||
tool_str = ""
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
usage: Optional[ConverseTokenUsageBlock] = None
|
||||
if "delta" in chunk_data:
|
||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||
if "text" in delta_obj:
|
||||
text = delta_obj["text"]
|
||||
elif "toolUse" in delta_obj:
|
||||
tool_str = delta_obj["toolUse"]["input"]
|
||||
elif "stopReason" in chunk_data:
|
||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||
elif "usage" in chunk_data:
|
||||
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
|
||||
response = GenericStreamingChunk(
|
||||
text=text,
|
||||
tool_str=tool_str,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
)
|
||||
return response
|
||||
try:
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
usage: Optional[ConverseTokenUsageBlock] = None
|
||||
|
||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||
if "start" in chunk_data:
|
||||
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
||||
if (
|
||||
start_obj is not None
|
||||
and "toolUse" in start_obj
|
||||
and start_obj["toolUse"] is not None
|
||||
):
|
||||
tool_use = {
|
||||
"id": start_obj["toolUse"]["toolUseId"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": start_obj["toolUse"]["name"],
|
||||
"arguments": "",
|
||||
},
|
||||
}
|
||||
elif "delta" in chunk_data:
|
||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||
if "text" in delta_obj:
|
||||
text = delta_obj["text"]
|
||||
elif "toolUse" in delta_obj:
|
||||
tool_use = {
|
||||
"id": None,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": None,
|
||||
"arguments": delta_obj["toolUse"]["input"],
|
||||
},
|
||||
}
|
||||
elif "stopReason" in chunk_data:
|
||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||
is_finished = True
|
||||
elif "usage" in chunk_data:
|
||||
usage = ConverseTokenUsageBlock(**chunk_data["usage"]) # type: ignore
|
||||
|
||||
response = GenericStreamingChunk(
|
||||
text=text,
|
||||
tool_use=tool_use,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
index=index,
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise Exception("Received streaming error - {}".format(str(e)))
|
||||
|
||||
def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
||||
text = ""
|
||||
|
@ -1890,12 +1953,16 @@ class AWSEventStreamDecoder:
|
|||
if "outputText" in chunk_data:
|
||||
text = chunk_data["outputText"]
|
||||
# ai21 mapping
|
||||
if "ai21" in self.model: # fake ai21 streaming
|
||||
elif "ai21" in self.model: # fake ai21 streaming
|
||||
text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
######## bedrock.anthropic mappings ###############
|
||||
elif "delta" in chunk_data:
|
||||
elif (
|
||||
"contentBlockIndex" in chunk_data
|
||||
or "stopReason" in chunk_data
|
||||
or "metrics" in chunk_data
|
||||
):
|
||||
return self.converse_chunk_parser(chunk_data=chunk_data)
|
||||
######## bedrock.mistral mappings ###############
|
||||
elif "outputs" in chunk_data:
|
||||
|
@ -1905,7 +1972,7 @@ class AWSEventStreamDecoder:
|
|||
):
|
||||
text = chunk_data["outputs"][0]["text"]
|
||||
stop_reason = chunk_data.get("stop_reason", None)
|
||||
if stop_reason != None:
|
||||
if stop_reason is not None:
|
||||
is_finished = True
|
||||
finish_reason = stop_reason
|
||||
######## bedrock.cohere mappings ###############
|
||||
|
@ -1926,8 +1993,9 @@ class AWSEventStreamDecoder:
|
|||
text=text,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
tool_str="",
|
||||
usage=None,
|
||||
index=0,
|
||||
tool_use=None,
|
||||
)
|
||||
|
||||
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:
|
||||
|
|
|
@ -139,6 +139,7 @@ def process_response(
|
|||
|
||||
def convert_model_to_url(model: str, api_base: str):
|
||||
user_id, app_id, model_id = model.split(".")
|
||||
model_id = model_id.lower()
|
||||
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
|
||||
|
||||
|
||||
|
@ -171,19 +172,55 @@ async def async_completion(
|
|||
|
||||
async_handler = AsyncHTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||||
response = await async_handler.post(
|
||||
api_base, headers=headers, data=json.dumps(data)
|
||||
url=model, headers=headers, data=json.dumps(data)
|
||||
)
|
||||
|
||||
return process_response(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
response=response,
|
||||
model_response=model_response,
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key=api_key,
|
||||
data=data,
|
||||
encoding=encoding,
|
||||
logging_obj=logging_obj,
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = response.json()
|
||||
except Exception:
|
||||
raise ClarifaiError(
|
||||
message=response.text, status_code=response.status_code, url=model
|
||||
)
|
||||
# print(completion_response)
|
||||
try:
|
||||
choices_list = []
|
||||
for idx, item in enumerate(completion_response["outputs"]):
|
||||
if len(item["data"]["text"]["raw"]) > 0:
|
||||
message_obj = Message(content=item["data"]["text"]["raw"])
|
||||
else:
|
||||
message_obj = Message(content=None)
|
||||
choice_obj = Choices(
|
||||
finish_reason="stop",
|
||||
index=idx + 1, # check
|
||||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
|
||||
except Exception as e:
|
||||
raise ClarifaiError(
|
||||
message=traceback.format_exc(), status_code=response.status_code, url=model
|
||||
)
|
||||
|
||||
# Calculate Usage
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
||||
def completion(
|
||||
|
@ -241,7 +278,7 @@ def completion(
|
|||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"headers": headers,
|
||||
"api_base": api_base,
|
||||
"api_base": model,
|
||||
},
|
||||
)
|
||||
if acompletion == True:
|
||||
|
|
|
@ -164,6 +164,49 @@ class MistralConfig:
|
|||
return optional_params
|
||||
|
||||
|
||||
class MistralEmbeddingConfig:
|
||||
"""
|
||||
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"encoding_format",
|
||||
]
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "encoding_format":
|
||||
optional_params["encoding_format"] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
class DeepInfraConfig:
|
||||
"""
|
||||
Reference: https://deepinfra.com/docs/advanced/openai_api
|
||||
|
|
199998
litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790
Normal file
199998
litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790
Normal file
File diff suppressed because it is too large
Load diff
|
@ -4,6 +4,7 @@ from enum import Enum
|
|||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Union, List, Literal, Any
|
||||
from pydantic import BaseModel
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||
import litellm, uuid
|
||||
import httpx, inspect # type: ignore
|
||||
|
@ -12,7 +13,12 @@ from litellm.llms.prompt_templates.factory import (
|
|||
convert_to_gemini_tool_call_result,
|
||||
convert_to_gemini_tool_call_invoke,
|
||||
)
|
||||
from litellm.types.files import get_file_mime_type_for_file_type, get_file_type_from_extension, is_gemini_1_5_accepted_file_type, is_video_file_type
|
||||
from litellm.types.files import (
|
||||
get_file_mime_type_for_file_type,
|
||||
get_file_type_from_extension,
|
||||
is_gemini_1_5_accepted_file_type,
|
||||
is_video_file_type,
|
||||
)
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
@ -301,15 +307,15 @@ def _process_gemini_image(image_url: str) -> PartType:
|
|||
# GCS URIs
|
||||
if "gs://" in image_url:
|
||||
# Figure out file type
|
||||
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
|
||||
extension = extension_with_dot[1:] # Ex: "png"
|
||||
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
|
||||
extension = extension_with_dot[1:] # Ex: "png"
|
||||
|
||||
file_type = get_file_type_from_extension(extension)
|
||||
|
||||
# Validate the file type is supported by Gemini
|
||||
if not is_gemini_1_5_accepted_file_type(file_type):
|
||||
raise Exception(f"File type not supported by gemini - {file_type}")
|
||||
|
||||
|
||||
mime_type = get_file_mime_type_for_file_type(file_type)
|
||||
file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
|
||||
|
||||
|
@ -320,7 +326,7 @@ def _process_gemini_image(image_url: str) -> PartType:
|
|||
image = _load_image_from_url(image_url)
|
||||
_blob = BlobType(data=image.data, mime_type=image._mime_type)
|
||||
return PartType(inline_data=_blob)
|
||||
|
||||
|
||||
# Base64 encoding
|
||||
elif "base64" in image_url:
|
||||
import base64, re
|
||||
|
@ -1293,6 +1299,95 @@ async def async_streaming(
|
|||
return streamwrapper
|
||||
|
||||
|
||||
class VertexAITextEmbeddingConfig(BaseModel):
|
||||
"""
|
||||
Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
|
||||
|
||||
Args:
|
||||
auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
|
||||
task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
|
||||
title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
|
||||
"""
|
||||
|
||||
auto_truncate: Optional[bool] = None
|
||||
task_type: Optional[
|
||||
Literal[
|
||||
"RETRIEVAL_QUERY",
|
||||
"RETRIEVAL_DOCUMENT",
|
||||
"SEMANTIC_SIMILARITY",
|
||||
"CLASSIFICATION",
|
||||
"CLUSTERING",
|
||||
"QUESTION_ANSWERING",
|
||||
"FACT_VERIFICATION",
|
||||
]
|
||||
] = None
|
||||
title: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
auto_truncate: Optional[bool] = None,
|
||||
task_type: Optional[
|
||||
Literal[
|
||||
"RETRIEVAL_QUERY",
|
||||
"RETRIEVAL_DOCUMENT",
|
||||
"SEMANTIC_SIMILARITY",
|
||||
"CLASSIFICATION",
|
||||
"CLUSTERING",
|
||||
"QUESTION_ANSWERING",
|
||||
"FACT_VERIFICATION",
|
||||
]
|
||||
] = None,
|
||||
title: Optional[str] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"dimensions",
|
||||
]
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "dimensions":
|
||||
optional_params["output_dimensionality"] = value
|
||||
return optional_params
|
||||
|
||||
def get_mapped_special_auth_params(self) -> dict:
|
||||
"""
|
||||
Common auth params across bedrock/vertex_ai/azure/watsonx
|
||||
"""
|
||||
return {"project": "vertex_project", "region_name": "vertex_location"}
|
||||
|
||||
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
||||
mapped_params = self.get_mapped_special_auth_params()
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param in mapped_params:
|
||||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
|
||||
def embedding(
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
|
@ -1316,7 +1411,7 @@ def embedding(
|
|||
message="vertexai import failed please run `pip install google-cloud-aiplatform`",
|
||||
)
|
||||
|
||||
from vertexai.language_models import TextEmbeddingModel
|
||||
from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
|
||||
import google.auth # type: ignore
|
||||
|
||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||
|
@ -1347,6 +1442,16 @@ def embedding(
|
|||
if isinstance(input, str):
|
||||
input = [input]
|
||||
|
||||
if optional_params is not None and isinstance(optional_params, dict):
|
||||
if optional_params.get("task_type") or optional_params.get("title"):
|
||||
# if user passed task_type or title, cast to TextEmbeddingInput
|
||||
_task_type = optional_params.pop("task_type", None)
|
||||
_title = optional_params.pop("title", None)
|
||||
input = [
|
||||
TextEmbeddingInput(text=x, task_type=_task_type, title=_title)
|
||||
for x in input
|
||||
]
|
||||
|
||||
try:
|
||||
llm_model = TextEmbeddingModel.from_pretrained(model)
|
||||
except Exception as e:
|
||||
|
@ -1363,7 +1468,8 @@ def embedding(
|
|||
encoding=encoding,
|
||||
)
|
||||
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
|
||||
_input_dict = {"texts": input, **optional_params}
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
|
||||
## LOGGING PRE-CALL
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
|
@ -1375,7 +1481,7 @@ def embedding(
|
|||
)
|
||||
|
||||
try:
|
||||
embeddings = llm_model.get_embeddings(input)
|
||||
embeddings = llm_model.get_embeddings(**_input_dict)
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=500, message=str(e))
|
||||
|
||||
|
@ -1383,6 +1489,7 @@ def embedding(
|
|||
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
||||
## Populate OpenAI compliant dictionary
|
||||
embedding_response = []
|
||||
input_tokens: int = 0
|
||||
for idx, embedding in enumerate(embeddings):
|
||||
embedding_response.append(
|
||||
{
|
||||
|
@ -1391,14 +1498,10 @@ def embedding(
|
|||
"embedding": embedding.values,
|
||||
}
|
||||
)
|
||||
input_tokens += embedding.statistics.token_count
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = 0
|
||||
|
||||
input_str = "".join(input)
|
||||
|
||||
input_tokens += len(encoding.encode(input_str))
|
||||
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
|
@ -1420,7 +1523,8 @@ async def async_embedding(
|
|||
"""
|
||||
Async embedding implementation
|
||||
"""
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({input})"""
|
||||
_input_dict = {"texts": input, **optional_params}
|
||||
request_str = f"""embeddings = llm_model.get_embeddings({_input_dict})"""
|
||||
## LOGGING PRE-CALL
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
|
@ -1432,7 +1536,7 @@ async def async_embedding(
|
|||
)
|
||||
|
||||
try:
|
||||
embeddings = await client.get_embeddings_async(input)
|
||||
embeddings = await client.get_embeddings_async(**_input_dict)
|
||||
except Exception as e:
|
||||
raise VertexAIError(status_code=500, message=str(e))
|
||||
|
||||
|
@ -1440,6 +1544,7 @@ async def async_embedding(
|
|||
logging_obj.post_call(input=input, api_key=None, original_response=embeddings)
|
||||
## Populate OpenAI compliant dictionary
|
||||
embedding_response = []
|
||||
input_tokens: int = 0
|
||||
for idx, embedding in enumerate(embeddings):
|
||||
embedding_response.append(
|
||||
{
|
||||
|
@ -1448,18 +1553,13 @@ async def async_embedding(
|
|||
"embedding": embedding.values,
|
||||
}
|
||||
)
|
||||
input_tokens += embedding.statistics.token_count
|
||||
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = 0
|
||||
|
||||
input_str = "".join(input)
|
||||
|
||||
input_tokens += len(encoding.encode(input_str))
|
||||
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
model_response.usage = usage
|
||||
|
||||
return model_response
|
||||
|
|
|
@ -11,10 +11,10 @@ import os, openai, sys, json, inspect, uuid, datetime, threading
|
|||
from typing import Any, Literal, Union, BinaryIO
|
||||
from typing_extensions import overload
|
||||
from functools import partial
|
||||
|
||||
import dotenv, traceback, random, asyncio, time, contextvars
|
||||
from copy import deepcopy
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from ._logging import verbose_logger
|
||||
from litellm import ( # type: ignore
|
||||
|
@ -335,6 +335,7 @@ async def acompletion(
|
|||
or custom_llm_provider == "predibase"
|
||||
or custom_llm_provider == "bedrock"
|
||||
or custom_llm_provider == "databricks"
|
||||
or custom_llm_provider == "clarifai"
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
|
|
@ -1387,6 +1387,26 @@
|
|||
"mode": "image_generation",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"text-embedding-004": {
|
||||
"max_tokens": 3072,
|
||||
"max_input_tokens": 3072,
|
||||
"output_vector_size": 768,
|
||||
"input_cost_per_token": 0.00000000625,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-embedding-models",
|
||||
"mode": "embedding",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||
},
|
||||
"text-multilingual-embedding-002": {
|
||||
"max_tokens": 2048,
|
||||
"max_input_tokens": 2048,
|
||||
"output_vector_size": 768,
|
||||
"input_cost_per_token": 0.00000000625,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-embedding-models",
|
||||
"mode": "embedding",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||
},
|
||||
"textembedding-gecko": {
|
||||
"max_tokens": 3072,
|
||||
"max_input_tokens": 3072,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""]
|
||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
import json
|
||||
import logging
|
||||
from logging import Formatter
|
||||
import sys
|
||||
import os
|
||||
from litellm import json_logs
|
||||
|
||||
# Set default log level to INFO
|
||||
log_level = os.getenv("LITELLM_LOG", "INFO")
|
||||
numeric_level: str = getattr(logging, log_level.upper())
|
||||
|
||||
|
||||
class JsonFormatter(Formatter):
|
||||
|
@ -16,6 +21,14 @@ class JsonFormatter(Formatter):
|
|||
|
||||
logger = logging.root
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(JsonFormatter())
|
||||
if json_logs:
|
||||
handler.setFormatter(JsonFormatter())
|
||||
else:
|
||||
formatter = logging.Formatter(
|
||||
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
handler.setFormatter(formatter)
|
||||
logger.handlers = [handler]
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.setLevel(numeric_level)
|
||||
|
|
|
@ -719,6 +719,8 @@ class Member(LiteLLMBase):
|
|||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_user_info(cls, values):
|
||||
if not isinstance(values, dict):
|
||||
raise ValueError("input needs to be a dictionary")
|
||||
if values.get("user_id") is None and values.get("user_email") is None:
|
||||
raise ValueError("Either user id or user email must be provided")
|
||||
return values
|
||||
|
@ -757,9 +759,24 @@ class GlobalEndUsersSpend(LiteLLMBase):
|
|||
|
||||
class TeamMemberAddRequest(LiteLLMBase):
|
||||
team_id: str
|
||||
member: Member
|
||||
member: Union[List[Member], Member]
|
||||
max_budget_in_team: Optional[float] = None # Users max budget within the team
|
||||
|
||||
def __init__(self, **data):
|
||||
member_data = data.get("member")
|
||||
if isinstance(member_data, list):
|
||||
# If member is a list of dictionaries, convert each dictionary to a Member object
|
||||
members = [Member(**item) for item in member_data]
|
||||
# Replace member_data with the list of Member objects
|
||||
data["member"] = members
|
||||
elif isinstance(member_data, dict):
|
||||
# If member is a dictionary, convert it to a single Member object
|
||||
member = Member(**member_data)
|
||||
# Replace member_data with the single Member object
|
||||
data["member"] = member
|
||||
# Call the superclass __init__ method to initialize the object
|
||||
super().__init__(**data)
|
||||
|
||||
|
||||
class TeamMemberDeleteRequest(LiteLLMBase):
|
||||
team_id: str
|
||||
|
@ -1472,6 +1489,9 @@ class SpendLogsMetadata(TypedDict):
|
|||
user_api_key_team_id: Optional[str]
|
||||
user_api_key_user_id: Optional[str]
|
||||
user_api_key_team_alias: Optional[str]
|
||||
spend_logs_metadata: Optional[
|
||||
dict
|
||||
] # special param to log k,v pairs to spendlogs for a call
|
||||
|
||||
|
||||
class SpendLogsPayload(TypedDict):
|
||||
|
@ -1496,3 +1516,60 @@ class SpendLogsPayload(TypedDict):
|
|||
request_tags: str # json str
|
||||
team_id: Optional[str]
|
||||
end_user: Optional[str]
|
||||
|
||||
|
||||
class SpanAttributes(str, enum.Enum):
|
||||
# Note: We've taken this from opentelemetry-semantic-conventions-ai
|
||||
# I chose to not add a new dependency to litellm for this
|
||||
|
||||
# Semantic Conventions for LLM requests, this needs to be removed after
|
||||
# OpenTelemetry Semantic Conventions support Gen AI.
|
||||
# Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
|
||||
# Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
|
||||
|
||||
LLM_SYSTEM = "gen_ai.system"
|
||||
LLM_REQUEST_MODEL = "gen_ai.request.model"
|
||||
LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
|
||||
LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
|
||||
LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
|
||||
LLM_PROMPTS = "gen_ai.prompt"
|
||||
LLM_COMPLETIONS = "gen_ai.completion"
|
||||
LLM_RESPONSE_MODEL = "gen_ai.response.model"
|
||||
LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
|
||||
LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
|
||||
LLM_TOKEN_TYPE = "gen_ai.token.type"
|
||||
# To be added
|
||||
# LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
|
||||
# LLM_RESPONSE_ID = "gen_ai.response.id"
|
||||
|
||||
# LLM
|
||||
LLM_REQUEST_TYPE = "llm.request.type"
|
||||
LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
|
||||
LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
|
||||
LLM_USER = "llm.user"
|
||||
LLM_HEADERS = "llm.headers"
|
||||
LLM_TOP_K = "llm.top_k"
|
||||
LLM_IS_STREAMING = "llm.is_streaming"
|
||||
LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
|
||||
LLM_PRESENCE_PENALTY = "llm.presence_penalty"
|
||||
LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
|
||||
LLM_REQUEST_FUNCTIONS = "llm.request.functions"
|
||||
LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
|
||||
LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
|
||||
LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
|
||||
LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
|
||||
|
||||
# OpenAI
|
||||
LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
|
||||
LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
|
||||
LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
|
||||
LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
|
||||
|
||||
|
||||
class ManagementEndpointLoggingPayload(LiteLLMBase):
|
||||
route: str
|
||||
request_data: dict
|
||||
response: Optional[dict] = None
|
||||
exception: Optional[Any] = None
|
||||
start_time: Optional[datetime] = None
|
||||
end_time: Optional[datetime] = None
|
||||
|
|
|
@ -151,8 +151,8 @@ def common_checks(
|
|||
and route != "/models"
|
||||
):
|
||||
if global_proxy_spend > litellm.max_budget:
|
||||
raise Exception(
|
||||
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=global_proxy_spend, max_budget=litellm.max_budget
|
||||
)
|
||||
return True
|
||||
|
||||
|
|
31
litellm/proxy/common_utils/http_parsing_utils.py
Normal file
31
litellm/proxy/common_utils/http_parsing_utils.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
from typing import Optional
|
||||
from fastapi import Request
|
||||
import ast, json
|
||||
|
||||
|
||||
async def _read_request_body(request: Optional[Request]) -> dict:
|
||||
"""
|
||||
Asynchronous function to read the request body and parse it as JSON or literal data.
|
||||
|
||||
Parameters:
|
||||
- request: The request object to read the body from
|
||||
|
||||
Returns:
|
||||
- dict: Parsed request data as a dictionary
|
||||
"""
|
||||
try:
|
||||
request_data: dict = {}
|
||||
if request is None:
|
||||
return request_data
|
||||
body = await request.body()
|
||||
|
||||
if body == b"" or body is None:
|
||||
return request_data
|
||||
body_str = body.decode()
|
||||
try:
|
||||
request_data = ast.literal_eval(body_str)
|
||||
except:
|
||||
request_data = json.loads(body_str)
|
||||
return request_data
|
||||
except:
|
||||
return {}
|
90
litellm/proxy/common_utils/management_endpoint_utils.py
Normal file
90
litellm/proxy/common_utils/management_endpoint_utils.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
from datetime import datetime
|
||||
from functools import wraps
|
||||
from litellm.proxy._types import UserAPIKeyAuth, ManagementEndpointLoggingPayload
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from fastapi import Request
|
||||
|
||||
|
||||
def management_endpoint_wrapper(func):
|
||||
"""
|
||||
This wrapper does the following:
|
||||
|
||||
1. Log I/O, Exceptions to OTEL
|
||||
2. Create an Audit log for success calls
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
end_time = datetime.now()
|
||||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
user_api_key_dict: UserAPIKeyAuth = (
|
||||
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||
)
|
||||
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||
if parent_otel_span is not None:
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
)
|
||||
_response = dict(result) if result is not None else None
|
||||
|
||||
logging_payload = ManagementEndpointLoggingPayload(
|
||||
route=_route,
|
||||
request_data=_request_body,
|
||||
response=_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
await open_telemetry_logger.async_management_endpoint_success_hook(
|
||||
logging_payload=logging_payload,
|
||||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
end_time = datetime.now()
|
||||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
user_api_key_dict: UserAPIKeyAuth = (
|
||||
kwargs.get("user_api_key_dict") or UserAPIKeyAuth()
|
||||
)
|
||||
parent_otel_span = user_api_key_dict.parent_otel_span
|
||||
if parent_otel_span is not None:
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if open_telemetry_logger is not None:
|
||||
_http_request: Request = kwargs.get("http_request")
|
||||
_route = _http_request.url.path
|
||||
_request_body: dict = await _read_request_body(
|
||||
request=_http_request
|
||||
)
|
||||
logging_payload = ManagementEndpointLoggingPayload(
|
||||
route=_route,
|
||||
request_data=_request_body,
|
||||
response=None,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
exception=e,
|
||||
)
|
||||
|
||||
await open_telemetry_logger.async_management_endpoint_failure_hook(
|
||||
logging_payload=logging_payload,
|
||||
parent_otel_span=parent_otel_span,
|
||||
)
|
||||
|
||||
raise e
|
||||
|
||||
return wrapper
|
|
@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
|
|||
data["cache"][k] = v
|
||||
|
||||
verbose_proxy_logger.debug("receiving data: %s", data)
|
||||
# users can pass in 'user' param to /chat/completions. Don't override it
|
||||
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
|
||||
# if users are using user_api_key_auth, set `user` in `data`
|
||||
data["user"] = user_api_key_dict.user_id
|
||||
|
||||
if "metadata" not in data:
|
||||
data["metadata"] = {}
|
||||
|
|
63
litellm/proxy/management_helpers/utils.py
Normal file
63
litellm/proxy/management_helpers/utils.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
# What is this?
|
||||
## Helper utils for the management endpoints (keys/users/teams)
|
||||
|
||||
from litellm.proxy._types import LiteLLM_TeamTable, Member, UserAPIKeyAuth
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
|
||||
async def add_new_member(
|
||||
new_member: Member,
|
||||
max_budget_in_team: Optional[float],
|
||||
prisma_client: PrismaClient,
|
||||
team_id: str,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
litellm_proxy_admin_name: str,
|
||||
):
|
||||
"""
|
||||
Add a new member to a team
|
||||
|
||||
- add team id to user table
|
||||
- add team member w/ budget to team member table
|
||||
"""
|
||||
## ADD TEAM ID, to USER TABLE IF NEW ##
|
||||
if new_member.user_id is not None:
|
||||
await prisma_client.db.litellm_usertable.update(
|
||||
where={"user_id": new_member.user_id},
|
||||
data={"teams": {"push": [team_id]}},
|
||||
)
|
||||
elif new_member.user_email is not None:
|
||||
user_data = {"user_id": str(uuid.uuid4()), "user_email": new_member.user_email}
|
||||
## user email is not unique acc. to prisma schema -> future improvement
|
||||
### for now: check if it exists in db, if not - insert it
|
||||
existing_user_row = await prisma_client.get_data(
|
||||
key_val={"user_email": new_member.user_email},
|
||||
table_name="user",
|
||||
query_type="find_all",
|
||||
)
|
||||
if existing_user_row is None or (
|
||||
isinstance(existing_user_row, list) and len(existing_user_row) == 0
|
||||
):
|
||||
|
||||
await prisma_client.insert_data(data=user_data, table_name="user")
|
||||
|
||||
# Check if trying to set a budget for team member
|
||||
if max_budget_in_team is not None and new_member.user_id is not None:
|
||||
# create a new budget item for this member
|
||||
response = await prisma_client.db.litellm_budgettable.create(
|
||||
data={
|
||||
"max_budget": max_budget_in_team,
|
||||
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
}
|
||||
)
|
||||
|
||||
_budget_id = response.budget_id
|
||||
await prisma_client.db.litellm_teammembership.create(
|
||||
data={
|
||||
"team_id": team_id,
|
||||
"user_id": new_member.user_id,
|
||||
"budget_id": _budget_id,
|
||||
}
|
||||
)
|
|
@ -14,10 +14,9 @@ model_list:
|
|||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: my-triton-model
|
||||
- model_name: mistral-embed
|
||||
litellm_params:
|
||||
model: triton/any"
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
|
||||
model: mistral/mistral-embed
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
|
|
@ -90,6 +90,7 @@ from litellm.types.llms.openai import (
|
|||
HttpxBinaryResponseContent,
|
||||
)
|
||||
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||
from litellm.proxy.management_helpers.utils import add_new_member
|
||||
from litellm.proxy.utils import (
|
||||
PrismaClient,
|
||||
DBClient,
|
||||
|
@ -102,7 +103,6 @@ from litellm.proxy.utils import (
|
|||
hash_token,
|
||||
html_form,
|
||||
missing_keys_html_form,
|
||||
_read_request_body,
|
||||
_is_valid_team_configs,
|
||||
_is_user_proxy_admin,
|
||||
_get_user_role,
|
||||
|
@ -114,6 +114,8 @@ from litellm.proxy.utils import (
|
|||
_to_ns,
|
||||
get_error_message_str,
|
||||
)
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
|
||||
from litellm import (
|
||||
CreateBatchRequest,
|
||||
RetrieveBatchRequest,
|
||||
|
@ -160,6 +162,10 @@ from litellm.proxy.auth.auth_checks import (
|
|||
get_user_object,
|
||||
allowed_routes_check,
|
||||
get_actual_routes,
|
||||
log_to_opentelemetry,
|
||||
)
|
||||
from litellm.proxy.common_utils.management_endpoint_utils import (
|
||||
management_endpoint_wrapper,
|
||||
)
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
from litellm.exceptions import RejectedRequestError
|
||||
|
@ -368,6 +374,11 @@ from typing import Dict
|
|||
api_key_header = APIKeyHeader(
|
||||
name="Authorization", auto_error=False, description="Bearer token"
|
||||
)
|
||||
azure_api_key_header = APIKeyHeader(
|
||||
name="API-Key",
|
||||
auto_error=False,
|
||||
description="Some older versions of the openai Python package will send an API-Key header with just the API key ",
|
||||
)
|
||||
user_api_base = None
|
||||
user_model = None
|
||||
user_debug = False
|
||||
|
@ -508,18 +519,27 @@ async def check_request_disconnection(request: Request, llm_api_call_task):
|
|||
|
||||
|
||||
async def user_api_key_auth(
|
||||
request: Request, api_key: str = fastapi.Security(api_key_header)
|
||||
request: Request,
|
||||
api_key: str = fastapi.Security(api_key_header),
|
||||
azure_api_key_header: str = fastapi.Security(azure_api_key_header),
|
||||
) -> UserAPIKeyAuth:
|
||||
global master_key, prisma_client, llm_model_list, user_custom_auth, custom_db_client, general_settings, proxy_logging_obj
|
||||
try:
|
||||
if isinstance(api_key, str):
|
||||
passed_in_key = api_key
|
||||
api_key = _get_bearer_token(api_key=api_key)
|
||||
|
||||
elif isinstance(azure_api_key_header, str):
|
||||
api_key = azure_api_key_header
|
||||
|
||||
parent_otel_span: Optional[Span] = None
|
||||
if open_telemetry_logger is not None:
|
||||
parent_otel_span = open_telemetry_logger.tracer.start_span(
|
||||
name="Received Proxy Server Request",
|
||||
start_time=_to_ns(datetime.now()),
|
||||
context=open_telemetry_logger.get_traceparent_from_header(
|
||||
headers=request.headers
|
||||
),
|
||||
)
|
||||
### USER-DEFINED AUTH FUNCTION ###
|
||||
if user_custom_auth is not None:
|
||||
|
@ -1062,8 +1082,9 @@ async def user_api_key_auth(
|
|||
|
||||
_user_id = _user.get("user_id", None)
|
||||
if user_current_spend > user_max_budget:
|
||||
raise Exception(
|
||||
f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=user_current_spend,
|
||||
max_budget=user_max_budget,
|
||||
)
|
||||
else:
|
||||
# Token exists, not expired now check if its in budget for the user
|
||||
|
@ -1094,9 +1115,11 @@ async def user_api_key_auth(
|
|||
)
|
||||
|
||||
if user_current_spend > user_max_budget:
|
||||
raise Exception(
|
||||
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=user_current_spend,
|
||||
max_budget=user_max_budget,
|
||||
)
|
||||
|
||||
# Check 3. Check if user is in their team budget
|
||||
if valid_token.team_member_spend is not None:
|
||||
if prisma_client is not None:
|
||||
|
@ -1130,8 +1153,9 @@ async def user_api_key_auth(
|
|||
)
|
||||
if team_member_budget is not None and team_member_budget > 0:
|
||||
if valid_token.team_member_spend > team_member_budget:
|
||||
raise Exception(
|
||||
f"ExceededBudget: Crossed spend within team. UserID: {valid_token.user_id}, in team {valid_token.team_id} has exceeded their budget. Current spend: {valid_token.team_member_spend}; Max Budget: {team_member_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=valid_token.team_member_spend,
|
||||
max_budget=team_member_budget,
|
||||
)
|
||||
|
||||
# Check 3. If token is expired
|
||||
|
@ -1189,8 +1213,9 @@ async def user_api_key_auth(
|
|||
####################################
|
||||
|
||||
if valid_token.spend >= valid_token.max_budget:
|
||||
raise Exception(
|
||||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=valid_token.spend,
|
||||
max_budget=valid_token.max_budget,
|
||||
)
|
||||
|
||||
# Check 5. Token Model Spend is under Model budget
|
||||
|
@ -1226,8 +1251,9 @@ async def user_api_key_auth(
|
|||
):
|
||||
current_model_spend = model_spend[0]["_sum"]["spend"]
|
||||
current_model_budget = max_budget_per_model[current_model]
|
||||
raise Exception(
|
||||
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=current_model_spend,
|
||||
max_budget=current_model_budget,
|
||||
)
|
||||
|
||||
# Check 6. Team spend is under Team budget
|
||||
|
@ -1251,8 +1277,9 @@ async def user_api_key_auth(
|
|||
)
|
||||
|
||||
if valid_token.team_spend >= valid_token.team_max_budget:
|
||||
raise Exception(
|
||||
f"ExceededTokenBudget: Current Team Spend: {valid_token.team_spend}; Max Budget for Team: {valid_token.team_max_budget}"
|
||||
raise litellm.BudgetExceededError(
|
||||
current_cost=valid_token.team_spend,
|
||||
max_budget=valid_token.team_max_budget,
|
||||
)
|
||||
|
||||
# Check 8: Additional Common Checks across jwt + key auth
|
||||
|
@ -1495,7 +1522,7 @@ async def user_api_key_auth(
|
|||
)
|
||||
if valid_token is None:
|
||||
# No token was found when looking up in the DB
|
||||
raise Exception("Invalid token passed")
|
||||
raise Exception("Invalid proxy server token passed")
|
||||
if valid_token_dict is not None:
|
||||
if user_id_information is not None and _is_user_proxy_admin(
|
||||
user_id_information
|
||||
|
@ -1528,6 +1555,14 @@ async def user_api_key_auth(
|
|||
str(e)
|
||||
)
|
||||
)
|
||||
|
||||
# Log this exception to OTEL
|
||||
if open_telemetry_logger is not None:
|
||||
await open_telemetry_logger.async_post_call_failure_hook(
|
||||
original_exception=e,
|
||||
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, litellm.BudgetExceededError):
|
||||
raise ProxyException(
|
||||
|
@ -7803,6 +7838,10 @@ async def get_global_spend_report(
|
|||
default=None,
|
||||
description="Time till which to view spend",
|
||||
),
|
||||
group_by: Optional[Literal["team", "customer"]] = fastapi.Query(
|
||||
default="team",
|
||||
description="Group spend by internal team or customer",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
|
||||
|
@ -7849,69 +7888,130 @@ async def get_global_spend_report(
|
|||
f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
|
||||
)
|
||||
|
||||
# first get data from spend logs -> SpendByModelApiKey
|
||||
# then read data from "SpendByModelApiKey" to format the response obj
|
||||
sql_query = """
|
||||
if group_by == "team":
|
||||
# first get data from spend logs -> SpendByModelApiKey
|
||||
# then read data from "SpendByModelApiKey" to format the response obj
|
||||
sql_query = """
|
||||
|
||||
WITH SpendByModelApiKey AS (
|
||||
SELECT
|
||||
date_trunc('day', sl."startTime") AS group_by_day,
|
||||
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
|
||||
sl.model,
|
||||
sl.api_key,
|
||||
SUM(sl.spend) AS model_api_spend,
|
||||
SUM(sl.total_tokens) AS model_api_tokens
|
||||
FROM
|
||||
"LiteLLM_SpendLogs" sl
|
||||
LEFT JOIN
|
||||
"LiteLLM_TeamTable" tt
|
||||
ON
|
||||
sl.team_id = tt.team_id
|
||||
WHERE
|
||||
sl."startTime" BETWEEN $1::date AND $2::date
|
||||
GROUP BY
|
||||
date_trunc('day', sl."startTime"),
|
||||
tt.team_alias,
|
||||
sl.model,
|
||||
sl.api_key
|
||||
)
|
||||
WITH SpendByModelApiKey AS (
|
||||
SELECT
|
||||
date_trunc('day', sl."startTime") AS group_by_day,
|
||||
COALESCE(tt.team_alias, 'Unassigned Team') AS team_name,
|
||||
sl.model,
|
||||
sl.api_key,
|
||||
SUM(sl.spend) AS model_api_spend,
|
||||
SUM(sl.total_tokens) AS model_api_tokens
|
||||
FROM
|
||||
"LiteLLM_SpendLogs" sl
|
||||
LEFT JOIN
|
||||
"LiteLLM_TeamTable" tt
|
||||
ON
|
||||
sl.team_id = tt.team_id
|
||||
WHERE
|
||||
sl."startTime" BETWEEN $1::date AND $2::date
|
||||
GROUP BY
|
||||
date_trunc('day', sl."startTime"),
|
||||
tt.team_alias,
|
||||
sl.model,
|
||||
sl.api_key
|
||||
)
|
||||
SELECT
|
||||
group_by_day,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'team_name', team_name,
|
||||
'total_spend', total_spend,
|
||||
'metadata', metadata
|
||||
)) AS teams
|
||||
FROM (
|
||||
SELECT
|
||||
group_by_day,
|
||||
team_name,
|
||||
SUM(model_api_spend) AS total_spend,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'model', model,
|
||||
'api_key', api_key,
|
||||
'spend', model_api_spend,
|
||||
'total_tokens', model_api_tokens
|
||||
)) AS metadata
|
||||
FROM
|
||||
SpendByModelApiKey
|
||||
GROUP BY
|
||||
group_by_day,
|
||||
team_name
|
||||
) AS aggregated
|
||||
GROUP BY
|
||||
group_by_day
|
||||
ORDER BY
|
||||
group_by_day;
|
||||
"""
|
||||
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
return db_response
|
||||
|
||||
elif group_by == "customer":
|
||||
sql_query = """
|
||||
|
||||
WITH SpendByModelApiKey AS (
|
||||
SELECT
|
||||
date_trunc('day', sl."startTime") AS group_by_day,
|
||||
sl.end_user AS customer,
|
||||
sl.model,
|
||||
sl.api_key,
|
||||
SUM(sl.spend) AS model_api_spend,
|
||||
SUM(sl.total_tokens) AS model_api_tokens
|
||||
FROM
|
||||
"LiteLLM_SpendLogs" sl
|
||||
WHERE
|
||||
sl."startTime" BETWEEN $1::date AND $2::date
|
||||
GROUP BY
|
||||
date_trunc('day', sl."startTime"),
|
||||
customer,
|
||||
sl.model,
|
||||
sl.api_key
|
||||
)
|
||||
SELECT
|
||||
group_by_day,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'team_name', team_name,
|
||||
'customer', customer,
|
||||
'total_spend', total_spend,
|
||||
'metadata', metadata
|
||||
)) AS teams
|
||||
FROM (
|
||||
SELECT
|
||||
group_by_day,
|
||||
team_name,
|
||||
SUM(model_api_spend) AS total_spend,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'model', model,
|
||||
'api_key', api_key,
|
||||
'spend', model_api_spend,
|
||||
'total_tokens', model_api_tokens
|
||||
)) AS metadata
|
||||
FROM
|
||||
SpendByModelApiKey
|
||||
GROUP BY
|
||||
group_by_day,
|
||||
team_name
|
||||
) AS aggregated
|
||||
)) AS customers
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
group_by_day,
|
||||
customer,
|
||||
SUM(model_api_spend) AS total_spend,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'model', model,
|
||||
'api_key', api_key,
|
||||
'spend', model_api_spend,
|
||||
'total_tokens', model_api_tokens
|
||||
)) AS metadata
|
||||
FROM
|
||||
SpendByModelApiKey
|
||||
GROUP BY
|
||||
group_by_day,
|
||||
customer
|
||||
) AS aggregated
|
||||
GROUP BY
|
||||
group_by_day
|
||||
ORDER BY
|
||||
group_by_day;
|
||||
"""
|
||||
"""
|
||||
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
return db_response
|
||||
return db_response
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
|
@ -8097,7 +8197,9 @@ async def _get_spend_report_for_time_range(
|
|||
|
||||
return response, spend_per_tag
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("Exception in _get_daily_spend_reports", e) # noqa
|
||||
verbose_proxy_logger.error(
|
||||
"Exception in _get_daily_spend_reports {}".format(str(e))
|
||||
) # noqa
|
||||
|
||||
|
||||
@router.post(
|
||||
|
@ -8755,7 +8857,7 @@ async def new_user(data: NewUserRequest):
|
|||
- organization_id: Optional[str] - specify the org a user belongs to.
|
||||
- user_email: Optional[str] - Specify a user email.
|
||||
- send_invite_email: Optional[bool] - Specify if an invite email should be sent.
|
||||
- user_role: Optional[str] - Specify a user role - "admin", "app_owner", "app_user"
|
||||
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
|
||||
- max_budget: Optional[float] - Specify max budget for a given user.
|
||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
||||
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
||||
|
@ -8790,7 +8892,10 @@ async def new_user(data: NewUserRequest):
|
|||
role="user",
|
||||
user_email=data_json.get("user_email", None),
|
||||
),
|
||||
)
|
||||
),
|
||||
http_request=Request(
|
||||
scope={"type": "http"},
|
||||
),
|
||||
)
|
||||
|
||||
if data.send_invite_email is True:
|
||||
|
@ -9823,8 +9928,10 @@ async def delete_end_user(
|
|||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=LiteLLM_TeamTable,
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def new_team(
|
||||
data: NewTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
|
@ -10058,6 +10165,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
|||
@router.post(
|
||||
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def update_team(
|
||||
data: UpdateTeamRequest,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
|
@ -10163,8 +10271,10 @@ async def update_team(
|
|||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def team_member_add(
|
||||
data: TeamMemberAddRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -10190,10 +10300,12 @@ async def team_member_add(
|
|||
raise HTTPException(status_code=400, detail={"error": "No team id passed in"})
|
||||
|
||||
if data.member is None:
|
||||
raise HTTPException(status_code=400, detail={"error": "No member passed in"})
|
||||
raise HTTPException(
|
||||
status_code=400, detail={"error": "No member/members passed in"}
|
||||
)
|
||||
|
||||
existing_team_row = await prisma_client.get_data( # type: ignore
|
||||
team_id=data.team_id, table_name="team", query_type="find_unique"
|
||||
existing_team_row = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": data.team_id}
|
||||
)
|
||||
if existing_team_row is None:
|
||||
raise HTTPException(
|
||||
|
@ -10203,75 +10315,50 @@ async def team_member_add(
|
|||
},
|
||||
)
|
||||
|
||||
new_member = data.member
|
||||
complete_team_data = LiteLLM_TeamTable(**existing_team_row.model_dump())
|
||||
|
||||
existing_team_row.members_with_roles.append(new_member)
|
||||
if isinstance(data.member, Member):
|
||||
# add to team db
|
||||
new_member = data.member
|
||||
|
||||
complete_team_data = LiteLLM_TeamTable(
|
||||
**_get_pydantic_json_dict(existing_team_row),
|
||||
complete_team_data.members_with_roles.append(new_member)
|
||||
|
||||
elif isinstance(data.member, List):
|
||||
# add to team db
|
||||
new_members = data.member
|
||||
|
||||
complete_team_data.members_with_roles.extend(new_members)
|
||||
|
||||
# ADD MEMBER TO TEAM
|
||||
_db_team_members = [m.model_dump() for m in complete_team_data.members_with_roles]
|
||||
updated_team = await prisma_client.db.litellm_teamtable.update(
|
||||
where={"team_id": data.team_id},
|
||||
data={"members_with_roles": json.dumps(_db_team_members)}, # type: ignore
|
||||
)
|
||||
|
||||
team_row = await prisma_client.update_data(
|
||||
update_key_values=complete_team_data.json(exclude_none=True),
|
||||
data=complete_team_data.json(exclude_none=True),
|
||||
table_name="team",
|
||||
team_id=data.team_id,
|
||||
)
|
||||
|
||||
## ADD USER, IF NEW ##
|
||||
user_data = { # type: ignore
|
||||
"teams": [team_row["team_id"]],
|
||||
"models": team_row["data"].models,
|
||||
}
|
||||
if new_member.user_id is not None:
|
||||
user_data["user_id"] = new_member.user_id # type: ignore
|
||||
await prisma_client.update_data(
|
||||
user_id=new_member.user_id,
|
||||
data=user_data,
|
||||
update_key_values_custom_query={
|
||||
"teams": {
|
||||
"push": [team_row["team_id"]],
|
||||
}
|
||||
},
|
||||
table_name="user",
|
||||
if isinstance(data.member, Member):
|
||||
await add_new_member(
|
||||
new_member=data.member,
|
||||
max_budget_in_team=data.max_budget_in_team,
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||
team_id=data.team_id,
|
||||
)
|
||||
elif new_member.user_email is not None:
|
||||
user_data["user_id"] = str(uuid.uuid4())
|
||||
user_data["user_email"] = new_member.user_email
|
||||
## user email is not unique acc. to prisma schema -> future improvement
|
||||
### for now: check if it exists in db, if not - insert it
|
||||
existing_user_row = await prisma_client.get_data(
|
||||
key_val={"user_email": new_member.user_email},
|
||||
table_name="user",
|
||||
query_type="find_all",
|
||||
)
|
||||
if existing_user_row is None or (
|
||||
isinstance(existing_user_row, list) and len(existing_user_row) == 0
|
||||
):
|
||||
elif isinstance(data.member, List):
|
||||
tasks: List = []
|
||||
for m in data.member:
|
||||
await add_new_member(
|
||||
new_member=m,
|
||||
max_budget_in_team=data.max_budget_in_team,
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||
team_id=data.team_id,
|
||||
)
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
await prisma_client.insert_data(data=user_data, table_name="user")
|
||||
|
||||
# Check if trying to set a budget for team member
|
||||
if data.max_budget_in_team is not None and new_member.user_id is not None:
|
||||
# create a new budget item for this member
|
||||
response = await prisma_client.db.litellm_budgettable.create(
|
||||
data={
|
||||
"max_budget": data.max_budget_in_team,
|
||||
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
}
|
||||
)
|
||||
|
||||
_budget_id = response.budget_id
|
||||
await prisma_client.db.litellm_teammembership.create(
|
||||
data={
|
||||
"team_id": data.team_id,
|
||||
"user_id": new_member.user_id,
|
||||
"budget_id": _budget_id,
|
||||
}
|
||||
)
|
||||
|
||||
return team_row
|
||||
return updated_team
|
||||
|
||||
|
||||
@router.post(
|
||||
|
@ -10279,8 +10366,10 @@ async def team_member_add(
|
|||
tags=["team management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def team_member_delete(
|
||||
data: TeamMemberDeleteRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -10384,8 +10473,10 @@ async def team_member_delete(
|
|||
@router.post(
|
||||
"/team/delete", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def delete_team(
|
||||
data: DeleteTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
None,
|
||||
|
@ -10469,10 +10560,12 @@ async def delete_team(
|
|||
@router.get(
|
||||
"/team/info", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def team_info(
|
||||
http_request: Request,
|
||||
team_id: str = fastapi.Query(
|
||||
default=None, description="Team ID in the request parameters"
|
||||
)
|
||||
),
|
||||
):
|
||||
"""
|
||||
get info on team + related keys
|
||||
|
@ -10556,8 +10649,10 @@ async def team_info(
|
|||
@router.post(
|
||||
"/team/block", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def block_team(
|
||||
data: BlockTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -10578,8 +10673,10 @@ async def block_team(
|
|||
@router.post(
|
||||
"/team/unblock", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def unblock_team(
|
||||
data: BlockTeamRequest,
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -10600,7 +10697,9 @@ async def unblock_team(
|
|||
@router.get(
|
||||
"/team/list", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def list_team(
|
||||
http_request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -13007,7 +13106,9 @@ async def auth_callback(request: Request):
|
|||
user_role = getattr(result, generic_user_role_attribute_name, None)
|
||||
|
||||
if user_id is None:
|
||||
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
|
||||
_first_name = getattr(result, "first_name", "") or ""
|
||||
_last_name = getattr(result, "last_name", "") or ""
|
||||
user_id = _first_name + _last_name
|
||||
|
||||
user_info = None
|
||||
user_id_models: List = []
|
||||
|
|
|
@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
|
|||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
model_id Int? @unique
|
||||
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
||||
}
|
||||
|
|
41
litellm/proxy/tests/test_openai_request_with_traceparent.py
Normal file
41
litellm/proxy/tests/test_openai_request_with_traceparent.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
# mypy: ignore-errors
|
||||
import openai
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.context import Context
|
||||
from opentelemetry.trace import SpanKind
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
|
||||
|
||||
trace.set_tracer_provider(TracerProvider())
|
||||
memory_exporter = InMemorySpanExporter()
|
||||
span_processor = SimpleSpanProcessor(memory_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
tracer = trace.get_tracer(__name__)
|
||||
|
||||
# create an otel traceparent header
|
||||
tracer = trace.get_tracer(__name__)
|
||||
with tracer.start_as_current_span("ishaan-local-dev-app") as span:
|
||||
span.set_attribute("generation_name", "ishaan-generation-openai-client")
|
||||
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||
extra_headers = {}
|
||||
context = trace.set_span_in_context(span)
|
||||
traceparent = TraceContextTextMapPropagator()
|
||||
traceparent.inject(carrier=extra_headers, context=context)
|
||||
print("EXTRA HEADERS: ", extra_headers)
|
||||
_trace_parent = extra_headers.get("traceparent")
|
||||
trace_id = _trace_parent.split("-")[1]
|
||||
print("Trace ID: ", trace_id)
|
||||
|
||||
# # request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="llama3",
|
||||
messages=[
|
||||
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||
],
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
print(response)
|
21
litellm/proxy/tests/test_simple_traceparent_openai.py
Normal file
21
litellm/proxy/tests/test_simple_traceparent_openai.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
# mypy: ignore-errors
|
||||
import openai
|
||||
import uuid
|
||||
|
||||
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||
example_traceparent = f"00-80e1afed08e019fc1110464cfa66635c-02e80198930058d4-01"
|
||||
extra_headers = {"traceparent": example_traceparent}
|
||||
_trace_id = example_traceparent.split("-")[1]
|
||||
|
||||
print("EXTRA HEADERS: ", extra_headers)
|
||||
print("Trace ID: ", _trace_id)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="llama3",
|
||||
messages=[
|
||||
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||
],
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
print(response)
|
|
@ -48,6 +48,7 @@ from datetime import datetime, timedelta
|
|||
from litellm.integrations.slack_alerting import SlackAlerting
|
||||
from typing_extensions import overload
|
||||
from functools import wraps
|
||||
from fastapi import Request
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
@ -2017,6 +2018,7 @@ def get_logging_payload(
|
|||
user_api_key_team_id=None,
|
||||
user_api_key_user_id=None,
|
||||
user_api_key_team_alias=None,
|
||||
spend_logs_metadata=None,
|
||||
)
|
||||
if isinstance(metadata, dict):
|
||||
verbose_proxy_logger.debug(
|
||||
|
@ -2595,36 +2597,6 @@ async def update_spend(
|
|||
raise e
|
||||
|
||||
|
||||
async def _read_request_body(request):
|
||||
"""
|
||||
Asynchronous function to read the request body and parse it as JSON or literal data.
|
||||
|
||||
Parameters:
|
||||
- request: The request object to read the body from
|
||||
|
||||
Returns:
|
||||
- dict: Parsed request data as a dictionary
|
||||
"""
|
||||
import ast, json
|
||||
|
||||
try:
|
||||
request_data = {}
|
||||
if request is None:
|
||||
return request_data
|
||||
body = await request.body()
|
||||
|
||||
if body == b"" or body is None:
|
||||
return request_data
|
||||
body_str = body.decode()
|
||||
try:
|
||||
request_data = ast.literal_eval(body_str)
|
||||
except:
|
||||
request_data = json.loads(body_str)
|
||||
return request_data
|
||||
except:
|
||||
return {}
|
||||
|
||||
|
||||
def _is_projected_spend_over_limit(
|
||||
current_spend: float, soft_budget_limit: Optional[float]
|
||||
):
|
||||
|
|
|
@ -2057,11 +2057,14 @@ class Router:
|
|||
generic_fallback_idx: Optional[int] = None
|
||||
## check for specific model group-specific fallbacks
|
||||
for idx, item in enumerate(fallbacks):
|
||||
if list(item.keys())[0] == model_group:
|
||||
fallback_model_group = item[model_group]
|
||||
break
|
||||
elif list(item.keys())[0] == "*":
|
||||
generic_fallback_idx = idx
|
||||
if isinstance(item, dict):
|
||||
if list(item.keys())[0] == model_group:
|
||||
fallback_model_group = item[model_group]
|
||||
break
|
||||
elif list(item.keys())[0] == "*":
|
||||
generic_fallback_idx = idx
|
||||
elif isinstance(item, str):
|
||||
fallback_model_group = [fallbacks.pop(idx)]
|
||||
## if none, check for generic fallback
|
||||
if (
|
||||
fallback_model_group is None
|
||||
|
@ -2310,13 +2313,15 @@ class Router:
|
|||
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
|
||||
fallback_model_group = None
|
||||
generic_fallback_idx: Optional[int] = None
|
||||
## check for specific model group-specific fallbacks
|
||||
for idx, item in enumerate(fallbacks):
|
||||
if list(item.keys())[0] == model_group:
|
||||
fallback_model_group = item[model_group]
|
||||
break
|
||||
elif list(item.keys())[0] == "*":
|
||||
generic_fallback_idx = idx
|
||||
if isinstance(item, dict):
|
||||
if list(item.keys())[0] == model_group:
|
||||
fallback_model_group = item[model_group]
|
||||
break
|
||||
elif list(item.keys())[0] == "*":
|
||||
generic_fallback_idx = idx
|
||||
elif isinstance(item, str):
|
||||
fallback_model_group = [fallbacks.pop(idx)]
|
||||
## if none, check for generic fallback
|
||||
if (
|
||||
fallback_model_group is None
|
||||
|
|
|
@ -810,6 +810,28 @@ def test_vertexai_embedding():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_vertexai_embedding_embedding_latest():
|
||||
try:
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
|
||||
response = embedding(
|
||||
model="vertex_ai/text-embedding-004",
|
||||
input=["hi"],
|
||||
dimensions=1,
|
||||
auto_truncate=True,
|
||||
task_type="RETRIEVAL_QUERY",
|
||||
)
|
||||
|
||||
assert len(response.data[0]["embedding"]) == 1
|
||||
assert response.usage.prompt_tokens > 0
|
||||
print(f"response:", response)
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vertexai_aembedding():
|
||||
try:
|
||||
|
|
|
@ -220,13 +220,13 @@ def test_completion_bedrock_claude_sts_oidc_auth():
|
|||
aws_web_identity_token = "oidc/circleci_v2/"
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
|
||||
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
||||
response = completion(
|
||||
response_1 = completion(
|
||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
|
@ -236,8 +236,40 @@ def test_completion_bedrock_claude_sts_oidc_auth():
|
|||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
print(response_1)
|
||||
assert len(response_1.choices) > 0
|
||||
assert len(response_1.choices[0].message.content) > 0
|
||||
|
||||
# This second call is to verify that the cache isn't breaking anything
|
||||
response_2 = completion(
|
||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=5,
|
||||
temperature=0.2,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
print(response_2)
|
||||
assert len(response_2.choices) > 0
|
||||
assert len(response_2.choices[0].message.content) > 0
|
||||
|
||||
# This third call is to verify that the cache isn't used for a different region
|
||||
response_3 = completion(
|
||||
model="bedrock/anthropic.claude-3-haiku-20240307-v1:0",
|
||||
messages=messages,
|
||||
max_tokens=6,
|
||||
temperature=0.3,
|
||||
aws_region_name="us-east-1",
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
print(response_3)
|
||||
assert len(response_3.choices) > 0
|
||||
assert len(response_3.choices[0].message.content) > 0
|
||||
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
@ -255,7 +287,7 @@ def test_completion_bedrock_httpx_command_r_sts_oidc_auth():
|
|||
aws_web_identity_token = "oidc/circleci_v2/"
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
# aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
# TODO: This is using David's IAM role, we should use Litellm's IAM role eventually
|
||||
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||
aws_role_name = "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||
|
||||
try:
|
||||
|
|
|
@ -16,7 +16,7 @@ from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
|||
from unittest.mock import patch, MagicMock
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||
|
||||
# litellm.num_retries=3
|
||||
# litellm.num_retries = 3
|
||||
litellm.cache = None
|
||||
litellm.success_callback = []
|
||||
user_message = "Write a short poem about the sky"
|
||||
|
@ -114,6 +114,27 @@ def test_null_role_response():
|
|||
assert response.choices[0].message.role == "assistant"
|
||||
|
||||
|
||||
def test_completion_azure_ai_command_r():
|
||||
try:
|
||||
import os
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
os.environ["AZURE_AI_API_BASE"] = os.getenv("AZURE_COHERE_API_BASE", "")
|
||||
os.environ["AZURE_AI_API_KEY"] = os.getenv("AZURE_COHERE_API_KEY", "")
|
||||
|
||||
response: litellm.ModelResponse = completion(
|
||||
model="azure_ai/command-r-plus",
|
||||
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||
) # type: ignore
|
||||
|
||||
assert "azure_ai" in response.model
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_azure_command_r():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
@ -530,6 +551,7 @@ def test_completion_cohere_command_r_plus_function_call():
|
|||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
force_single_step=True,
|
||||
)
|
||||
print(second_response)
|
||||
except Exception as e:
|
||||
|
@ -720,7 +742,11 @@ def test_completion_claude_3_function_plus_image():
|
|||
print(response)
|
||||
|
||||
|
||||
def test_completion_azure_mistral_large_function_calling():
|
||||
@pytest.mark.parametrize(
|
||||
"provider",
|
||||
["azure", "azure_ai"],
|
||||
)
|
||||
def test_completion_azure_mistral_large_function_calling(provider):
|
||||
"""
|
||||
This primarily tests if the 'Function()' pydantic object correctly handles argument param passed in as a dict vs. string
|
||||
"""
|
||||
|
@ -751,8 +777,9 @@ def test_completion_azure_mistral_large_function_calling():
|
|||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||
}
|
||||
]
|
||||
|
||||
response = completion(
|
||||
model="azure/mistral-large-latest",
|
||||
model="{}/mistral-large-latest".format(provider),
|
||||
api_base=os.getenv("AZURE_MISTRAL_API_BASE"),
|
||||
api_key=os.getenv("AZURE_MISTRAL_API_KEY"),
|
||||
messages=messages,
|
||||
|
|
|
@ -34,14 +34,15 @@ class MyCustomHandler(CustomLogger):
|
|||
self.response_cost = 0
|
||||
|
||||
def log_pre_api_call(self, model, messages, kwargs):
|
||||
print(f"Pre-API Call")
|
||||
print("Pre-API Call")
|
||||
traceback.print_stack()
|
||||
self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {})
|
||||
|
||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"Post-API Call")
|
||||
print("Post-API Call")
|
||||
|
||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Stream")
|
||||
print("On Stream")
|
||||
|
||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print(f"On Success")
|
||||
|
@ -372,6 +373,7 @@ async def test_async_custom_handler_embedding_optional_param():
|
|||
Tests if the openai optional params for embedding - user + encoding_format,
|
||||
are logged
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
customHandler_optional_params = MyCustomHandler()
|
||||
litellm.callbacks = [customHandler_optional_params]
|
||||
response = await litellm.aembedding(
|
||||
|
|
|
@ -55,8 +55,12 @@ async def test_content_policy_exception_azure():
|
|||
except litellm.ContentPolicyViolationError as e:
|
||||
print("caught a content policy violation error! Passed")
|
||||
print("exception", e)
|
||||
assert e.litellm_debug_info is not None
|
||||
assert isinstance(e.litellm_debug_info, str)
|
||||
assert len(e.litellm_debug_info) > 0
|
||||
pass
|
||||
except Exception as e:
|
||||
print()
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
|
|
|
@ -195,6 +195,8 @@ async def test_aimage_generation_vertex_ai():
|
|||
assert isinstance(d, litellm.ImageObject)
|
||||
print("data in response.data", d)
|
||||
assert d.b64_json is not None
|
||||
except litellm.ServiceUnavailableError as e:
|
||||
pass
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except litellm.ContentPolicyViolationError:
|
||||
|
|
|
@ -16,6 +16,7 @@ from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLMRoutes
|
|||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||
from litellm.caching import DualCache
|
||||
from datetime import datetime, timedelta
|
||||
from fastapi import Request
|
||||
|
||||
public_key = {
|
||||
"kty": "RSA",
|
||||
|
@ -346,6 +347,7 @@ async def test_team_token_output(prisma_client, audience):
|
|||
models=["gpt-3.5-turbo", "gpt-4"],
|
||||
),
|
||||
user_api_key_dict=result,
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"This should not fail - {str(e)}")
|
||||
|
@ -534,6 +536,7 @@ async def test_user_token_output(
|
|||
models=["gpt-3.5-turbo", "gpt-4"],
|
||||
),
|
||||
user_api_key_dict=result,
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
if default_team_id:
|
||||
await new_team(
|
||||
|
@ -544,6 +547,7 @@ async def test_user_token_output(
|
|||
models=["gpt-3.5-turbo", "gpt-4"],
|
||||
),
|
||||
user_api_key_dict=result,
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"This should not fail - {str(e)}")
|
||||
|
|
|
@ -137,6 +137,7 @@ async def test_new_user_response(prisma_client):
|
|||
NewTeamRequest(
|
||||
team_id=_team_id,
|
||||
),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||
api_key="sk-1234",
|
||||
|
@ -272,7 +273,7 @@ def test_call_with_invalid_key(prisma_client):
|
|||
except Exception as e:
|
||||
print("Got Exception", e)
|
||||
print(e.message)
|
||||
assert "Authentication Error, Invalid token passed" in e.message
|
||||
assert "Authentication Error, Invalid proxy server token passed" in e.message
|
||||
pass
|
||||
|
||||
|
||||
|
@ -368,6 +369,7 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
|
|||
new_team_response = await new_team(
|
||||
data=team_request,
|
||||
user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
print("new_team_response", new_team_response)
|
||||
created_team_id = new_team_response["team_id"]
|
||||
|
@ -471,7 +473,7 @@ def test_call_with_user_over_budget(prisma_client):
|
|||
asyncio.run(test())
|
||||
except Exception as e:
|
||||
error_detail = e.message
|
||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
||||
assert "Budget has been exceeded" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -652,7 +654,7 @@ def test_call_with_proxy_over_budget(prisma_client):
|
|||
error_detail = e.message
|
||||
else:
|
||||
error_detail = traceback.format_exc()
|
||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
||||
assert "Budget has been exceeded" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -730,7 +732,7 @@ def test_call_with_user_over_budget_stream(prisma_client):
|
|||
asyncio.run(test())
|
||||
except Exception as e:
|
||||
error_detail = e.message
|
||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
||||
assert "Budget has been exceeded" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -827,7 +829,7 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
|
|||
asyncio.run(test())
|
||||
except Exception as e:
|
||||
error_detail = e.message
|
||||
assert "Authentication Error, ExceededBudget:" in error_detail
|
||||
assert "Budget has been exceeded" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -1086,6 +1088,7 @@ def test_generate_and_update_key(prisma_client):
|
|||
api_key="sk-1234",
|
||||
user_id="1234",
|
||||
),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
|
||||
_team_2 = "ishaan-special-team_{}".format(uuid.uuid4())
|
||||
|
@ -1098,6 +1101,7 @@ def test_generate_and_update_key(prisma_client):
|
|||
api_key="sk-1234",
|
||||
user_id="1234",
|
||||
),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
|
||||
request = NewUserRequest(
|
||||
|
@ -1175,7 +1179,6 @@ def test_generate_and_update_key(prisma_client):
|
|||
asyncio.run(test())
|
||||
except Exception as e:
|
||||
print("Got Exception", e)
|
||||
print(e.message)
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
|
||||
|
@ -1363,7 +1366,7 @@ def test_call_with_key_over_budget(prisma_client):
|
|||
error_detail = e.message
|
||||
else:
|
||||
error_detail = str(e)
|
||||
assert "Authentication Error, ExceededTokenBudget:" in error_detail
|
||||
assert "Budget has been exceeded" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -1477,7 +1480,7 @@ def test_call_with_key_over_model_budget(prisma_client):
|
|||
# print(f"Error - {str(e)}")
|
||||
traceback.print_exc()
|
||||
error_detail = e.message
|
||||
assert "Authentication Error, ExceededModelBudget:" in error_detail
|
||||
assert "Budget has been exceeded!" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -1638,7 +1641,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
|
|||
except Exception as e:
|
||||
print("Got Exception", e)
|
||||
error_detail = e.message
|
||||
assert "Authentication Error, ExceededTokenBudget:" in error_detail
|
||||
assert "Budget has been exceeded" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
|
@ -2051,6 +2054,7 @@ async def test_master_key_hashing(prisma_client):
|
|||
api_key="sk-1234",
|
||||
user_id="1234",
|
||||
),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
|
||||
_response = await new_user(
|
||||
|
@ -2184,6 +2188,7 @@ async def test_create_update_team(prisma_client):
|
|||
tpm_limit=20,
|
||||
rpm_limit=20,
|
||||
),
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||
api_key="sk-1234",
|
||||
|
@ -2233,7 +2238,10 @@ async def test_create_update_team(prisma_client):
|
|||
)
|
||||
|
||||
# now hit team_info
|
||||
response = await team_info(team_id=_team_id)
|
||||
response = await team_info(
|
||||
team_id=_team_id,
|
||||
http_request=Request(scope={"type": "http"}),
|
||||
)
|
||||
|
||||
print("RESPONSE from team_info", response)
|
||||
|
||||
|
|
|
@ -1059,3 +1059,53 @@ async def test_default_model_fallbacks(sync_mode, litellm_module_fallbacks):
|
|||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
assert response.model is not None and response.model == "gpt-4o"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_side_fallbacks_list(sync_mode):
|
||||
"""
|
||||
|
||||
Tests Client Side Fallbacks
|
||||
|
||||
User can pass "fallbacks": ["gpt-3.5-turbo"] and this should work
|
||||
|
||||
"""
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "bad-model",
|
||||
"litellm_params": {
|
||||
"model": "openai/my-bad-model",
|
||||
"api_key": "my-bad-api-key",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "my-good-model",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
if sync_mode:
|
||||
response = router.completion(
|
||||
model="bad-model",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
fallbacks=["my-good-model"],
|
||||
mock_testing_fallbacks=True,
|
||||
mock_response="Hey! nice day",
|
||||
)
|
||||
else:
|
||||
response = await router.acompletion(
|
||||
model="bad-model",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
fallbacks=["my-good-model"],
|
||||
mock_testing_fallbacks=True,
|
||||
mock_response="Hey! nice day",
|
||||
)
|
||||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
assert response.model is not None and response.model == "gpt-4o"
|
||||
|
|
|
@ -1463,6 +1463,10 @@ async def test_parallel_streaming_requests(sync_mode, model):
|
|||
|
||||
except RateLimitError:
|
||||
pass
|
||||
except litellm.InternalServerError as e:
|
||||
if "predibase" in str(e).lower():
|
||||
# only skip internal server error from predibase - their endpoint seems quite unstable
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
@ -2535,7 +2539,10 @@ def streaming_and_function_calling_format_tests(idx, chunk):
|
|||
return extracted_chunk, finished
|
||||
|
||||
|
||||
def test_openai_streaming_and_function_calling():
|
||||
@pytest.mark.parametrize(
|
||||
"model", ["gpt-3.5-turbo", "anthropic.claude-3-sonnet-20240229-v1:0"]
|
||||
)
|
||||
def test_streaming_and_function_calling(model):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
|
@ -2556,16 +2563,21 @@ def test_openai_streaming_and_function_calling():
|
|||
},
|
||||
}
|
||||
]
|
||||
|
||||
messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo",
|
||||
litellm.set_verbose = True
|
||||
response: litellm.CustomStreamWrapper = completion(
|
||||
model=model,
|
||||
tools=tools,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
)
|
||||
tool_choice="required",
|
||||
) # type: ignore
|
||||
# Add any assertions here to check the response
|
||||
for idx, chunk in enumerate(response):
|
||||
# continue
|
||||
print("\n{}\n".format(chunk))
|
||||
if idx == 0:
|
||||
assert (
|
||||
chunk.choices[0].delta.tool_calls[0].function.arguments is not None
|
||||
|
@ -2573,6 +2585,7 @@ def test_openai_streaming_and_function_calling():
|
|||
assert isinstance(
|
||||
chunk.choices[0].delta.tool_calls[0].function.arguments, str
|
||||
)
|
||||
# assert False
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
raise e
|
||||
|
|
|
@ -3990,6 +3990,7 @@ def test_async_text_completion():
|
|||
asyncio.run(test_get_response())
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Tgai endpoints are unstable")
|
||||
def test_async_text_completion_together_ai():
|
||||
litellm.set_verbose = True
|
||||
print("test_async_text_completion")
|
||||
|
|
|
@ -187,12 +187,43 @@ def test_load_test_token_counter(model):
|
|||
print("model={}, total test time={}".format(model, total_time))
|
||||
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
||||
|
||||
|
||||
def test_openai_token_with_image_and_text():
|
||||
model = "gpt-4o"
|
||||
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
|
||||
full_request = {
|
||||
"model": "gpt-4o",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "json",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["clause"],
|
||||
"properties": {"clause": {"type": "string"}},
|
||||
},
|
||||
"description": "Respond with a JSON object.",
|
||||
},
|
||||
}
|
||||
],
|
||||
"logprobs": False,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"text": "\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.",
|
||||
"type": "text",
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
"tool_choice": {"type": "function", "function": {"name": "json"}},
|
||||
"exclude_models": [],
|
||||
"disable_fallback": False,
|
||||
"exclude_providers": [],
|
||||
}
|
||||
messages = full_request.get("messages", [])
|
||||
|
||||
token_count = token_counter(model=model, messages=messages)
|
||||
print(token_count)
|
||||
|
||||
test_openai_token_with_image_and_text()
|
|
@ -23,6 +23,7 @@ from litellm.utils import (
|
|||
create_pretrained_tokenizer,
|
||||
create_tokenizer,
|
||||
get_max_tokens,
|
||||
get_supported_openai_params,
|
||||
)
|
||||
|
||||
# Assuming your trim_messages, shorten_message_to_fit_limit, and get_token_count functions are all in a module named 'message_utils'
|
||||
|
@ -386,3 +387,11 @@ def test_get_max_token_unit_test():
|
|||
) # Returns a number instead of throwing an Exception
|
||||
|
||||
assert isinstance(max_tokens, int)
|
||||
|
||||
|
||||
def test_get_supported_openai_params() -> None:
|
||||
# Mapped provider
|
||||
assert isinstance(get_supported_openai_params("gpt-4"), list)
|
||||
|
||||
# Unmapped provider
|
||||
assert get_supported_openai_params("nonexistent") is None
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import TypedDict, Any, Union, Optional, Literal, List
|
||||
import json
|
||||
from .openai import ChatCompletionToolCallChunk
|
||||
from typing_extensions import (
|
||||
Self,
|
||||
Protocol,
|
||||
|
@ -118,6 +119,15 @@ class ToolBlockDeltaEvent(TypedDict):
|
|||
input: str
|
||||
|
||||
|
||||
class ToolUseBlockStartEvent(TypedDict):
|
||||
name: str
|
||||
toolUseId: str
|
||||
|
||||
|
||||
class ContentBlockStartEvent(TypedDict, total=False):
|
||||
toolUse: Optional[ToolUseBlockStartEvent]
|
||||
|
||||
|
||||
class ContentBlockDeltaEvent(TypedDict, total=False):
|
||||
"""
|
||||
Either 'text' or 'toolUse' will be specified for Converse API streaming response.
|
||||
|
@ -138,10 +148,11 @@ class RequestObject(TypedDict, total=False):
|
|||
|
||||
class GenericStreamingChunk(TypedDict):
|
||||
text: Required[str]
|
||||
tool_str: Required[str]
|
||||
tool_use: Optional[ChatCompletionToolCallChunk]
|
||||
is_finished: Required[bool]
|
||||
finish_reason: Required[str]
|
||||
usage: Optional[ConverseTokenUsageBlock]
|
||||
index: int
|
||||
|
||||
|
||||
class Document(TypedDict):
|
||||
|
|
|
@ -296,14 +296,27 @@ class ListBatchRequest(TypedDict, total=False):
|
|||
|
||||
|
||||
class ChatCompletionToolCallFunctionChunk(TypedDict):
|
||||
name: str
|
||||
name: Optional[str]
|
||||
arguments: str
|
||||
|
||||
|
||||
class ChatCompletionToolCallChunk(TypedDict):
|
||||
id: Optional[str]
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolCallFunctionChunk
|
||||
|
||||
|
||||
class ChatCompletionDeltaToolCallChunk(TypedDict):
|
||||
id: str
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolCallFunctionChunk
|
||||
index: int
|
||||
|
||||
|
||||
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
||||
content: Optional[str]
|
||||
tool_calls: List[ChatCompletionDeltaToolCallChunk]
|
||||
role: str
|
||||
|
||||
|
||||
class ChatCompletionResponseMessage(TypedDict, total=False):
|
||||
|
|
182
litellm/utils.py
182
litellm/utils.py
|
@ -30,7 +30,7 @@ from dataclasses import (
|
|||
dataclass,
|
||||
field,
|
||||
)
|
||||
|
||||
import os
|
||||
import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||
from litellm.caching import DualCache
|
||||
|
@ -49,9 +49,9 @@ except (ImportError, AttributeError):
|
|||
|
||||
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
||||
|
||||
os.environ["TIKTOKEN_CACHE_DIR"] = (
|
||||
filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||
)
|
||||
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
|
||||
"CUSTOM_TIKTOKEN_CACHE_DIR", filename
|
||||
) # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
from importlib import resources
|
||||
|
@ -63,6 +63,11 @@ claude_json_str = json.dumps(json_data)
|
|||
import importlib.metadata
|
||||
from ._logging import verbose_logger
|
||||
from .types.router import LiteLLM_Params
|
||||
from .types.llms.openai import (
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionDeltaToolCallChunk,
|
||||
)
|
||||
from .integrations.traceloop import TraceloopLogger
|
||||
from .integrations.athina import AthinaLogger
|
||||
from .integrations.helicone import HeliconeLogger
|
||||
|
@ -933,7 +938,6 @@ class TextCompletionResponse(OpenAIObject):
|
|||
object=None,
|
||||
**params,
|
||||
):
|
||||
|
||||
if stream:
|
||||
object = "text_completion.chunk"
|
||||
choices = [TextChoices()]
|
||||
|
@ -942,7 +946,6 @@ class TextCompletionResponse(OpenAIObject):
|
|||
if choices is not None and isinstance(choices, list):
|
||||
new_choices = []
|
||||
for choice in choices:
|
||||
|
||||
if isinstance(choice, TextChoices):
|
||||
_new_choice = choice
|
||||
elif isinstance(choice, dict):
|
||||
|
@ -1018,7 +1021,6 @@ class ImageObject(OpenAIObject):
|
|||
revised_prompt: Optional[str] = None
|
||||
|
||||
def __init__(self, b64_json=None, url=None, revised_prompt=None):
|
||||
|
||||
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
|
||||
|
||||
def __contains__(self, key):
|
||||
|
@ -1342,28 +1344,29 @@ class Logging:
|
|||
)
|
||||
else:
|
||||
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
|
||||
# log raw request to provider (like LangFuse)
|
||||
try:
|
||||
# [Non-blocking Extra Debug Information in metadata]
|
||||
_litellm_params = self.model_call_details.get("litellm_params", {})
|
||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||
if (
|
||||
litellm.turn_off_message_logging is not None
|
||||
and litellm.turn_off_message_logging is True
|
||||
):
|
||||
# log raw request to provider (like LangFuse) -- if opted in.
|
||||
if litellm.log_raw_request_response is True:
|
||||
try:
|
||||
# [Non-blocking Extra Debug Information in metadata]
|
||||
_litellm_params = self.model_call_details.get("litellm_params", {})
|
||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||
if (
|
||||
litellm.turn_off_message_logging is not None
|
||||
and litellm.turn_off_message_logging is True
|
||||
):
|
||||
_metadata["raw_request"] = (
|
||||
"redacted by litellm. \
|
||||
'litellm.turn_off_message_logging=True'"
|
||||
)
|
||||
else:
|
||||
_metadata["raw_request"] = str(curl_command)
|
||||
except Exception as e:
|
||||
_metadata["raw_request"] = (
|
||||
"redacted by litellm. \
|
||||
'litellm.turn_off_message_logging=True'"
|
||||
"Unable to Log \
|
||||
raw request: {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
else:
|
||||
_metadata["raw_request"] = str(curl_command)
|
||||
except Exception as e:
|
||||
_metadata["raw_request"] = (
|
||||
"Unable to Log \
|
||||
raw request: {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
if self.logger_fn and callable(self.logger_fn):
|
||||
try:
|
||||
self.logger_fn(
|
||||
|
@ -1621,7 +1624,6 @@ class Logging:
|
|||
end_time=end_time,
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
complete_streaming_response = None
|
||||
else:
|
||||
self.sync_streaming_chunks.append(result)
|
||||
|
@ -2391,7 +2393,6 @@ class Logging:
|
|||
"async_complete_streaming_response"
|
||||
in self.model_call_details
|
||||
):
|
||||
|
||||
await customLogger.async_log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=self.model_call_details[
|
||||
|
@ -2730,7 +2731,7 @@ class Logging:
|
|||
only redacts when litellm.turn_off_message_logging == True
|
||||
"""
|
||||
# check if user opted out of logging message/response to callbacks
|
||||
if litellm.turn_off_message_logging == True:
|
||||
if litellm.turn_off_message_logging is True:
|
||||
# remove messages, prompts, input, response from logging
|
||||
self.model_call_details["messages"] = [
|
||||
{"role": "user", "content": "redacted-by-litellm"}
|
||||
|
@ -3250,7 +3251,7 @@ def client(original_function):
|
|||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
|
||||
if kwargs.get("stream", False) == True:
|
||||
if kwargs.get("stream", False) is True:
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
model=model,
|
||||
|
@ -4030,7 +4031,10 @@ def openai_token_counter(
|
|||
"""
|
||||
print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}")
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
if "gpt-4o" in model:
|
||||
encoding = tiktoken.get_encoding("o200k_base")
|
||||
else:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
except KeyError:
|
||||
print_verbose("Warning: model not found. Using cl100k_base encoding.")
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
|
@ -4894,6 +4898,18 @@ def get_optional_params_embeddings(
|
|||
)
|
||||
final_params = {**optional_params, **kwargs}
|
||||
return final_params
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model,
|
||||
custom_llm_provider="vertex_ai",
|
||||
request_type="embeddings",
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = litellm.VertexAITextEmbeddingConfig().map_openai_params(
|
||||
non_default_params=non_default_params, optional_params={}
|
||||
)
|
||||
final_params = {**optional_params, **kwargs}
|
||||
return final_params
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
if len(non_default_params.keys()) > 0:
|
||||
if litellm.drop_params is True: # drop the unsupported non-default values
|
||||
|
@ -4927,7 +4943,18 @@ def get_optional_params_embeddings(
|
|||
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
|
||||
)
|
||||
return {**non_default_params, **kwargs}
|
||||
|
||||
if custom_llm_provider == "mistral":
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model,
|
||||
custom_llm_provider="mistral",
|
||||
request_type="embeddings",
|
||||
)
|
||||
_check_valid_arg(supported_params=supported_params)
|
||||
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
|
||||
non_default_params=non_default_params, optional_params={}
|
||||
)
|
||||
final_params = {**optional_params, **kwargs}
|
||||
return final_params
|
||||
if (
|
||||
custom_llm_provider != "openai"
|
||||
and custom_llm_provider != "azure"
|
||||
|
@ -6166,13 +6193,16 @@ def get_api_base(
|
|||
if litellm.model_alias_map and model in litellm.model_alias_map:
|
||||
model = litellm.model_alias_map[model]
|
||||
try:
|
||||
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
|
||||
get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=_optional_params.custom_llm_provider,
|
||||
api_base=_optional_params.api_base,
|
||||
api_key=_optional_params.api_key,
|
||||
)
|
||||
(
|
||||
model,
|
||||
custom_llm_provider,
|
||||
dynamic_api_key,
|
||||
dynamic_api_base,
|
||||
) = get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=_optional_params.custom_llm_provider,
|
||||
api_base=_optional_params.api_base,
|
||||
api_key=_optional_params.api_key,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.debug("Error occurred in getting api base - {}".format(str(e)))
|
||||
|
@ -6220,7 +6250,7 @@ def get_first_chars_messages(kwargs: dict) -> str:
|
|||
|
||||
def get_supported_openai_params(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
request_type: Literal["chat_completion", "embeddings"] = "chat_completion",
|
||||
) -> Optional[list]:
|
||||
"""
|
||||
|
@ -6235,6 +6265,11 @@ def get_supported_openai_params(
|
|||
- List if custom_llm_provider is mapped
|
||||
- None if unmapped
|
||||
"""
|
||||
if not custom_llm_provider:
|
||||
try:
|
||||
custom_llm_provider = litellm.get_llm_provider(model=model)[1]
|
||||
except BadRequestError:
|
||||
return None
|
||||
if custom_llm_provider == "bedrock":
|
||||
return litellm.AmazonConverseConfig().get_supported_openai_params(model=model)
|
||||
elif custom_llm_provider == "ollama":
|
||||
|
@ -6328,7 +6363,10 @@ def get_supported_openai_params(
|
|||
"max_retries",
|
||||
]
|
||||
elif custom_llm_provider == "mistral":
|
||||
return litellm.MistralConfig().get_supported_openai_params()
|
||||
if request_type == "chat_completion":
|
||||
return litellm.MistralConfig().get_supported_openai_params()
|
||||
elif request_type == "embeddings":
|
||||
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
|
||||
elif custom_llm_provider == "replicate":
|
||||
return [
|
||||
"stream",
|
||||
|
@ -6370,7 +6408,10 @@ def get_supported_openai_params(
|
|||
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
||||
return ["temperature", "top_p", "stream", "n", "stop", "max_tokens"]
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
return litellm.VertexAIConfig().get_supported_openai_params()
|
||||
if request_type == "chat_completion":
|
||||
return litellm.VertexAIConfig().get_supported_openai_params()
|
||||
elif request_type == "embeddings":
|
||||
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
|
||||
elif custom_llm_provider == "sagemaker":
|
||||
return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
|
||||
elif custom_llm_provider == "aleph_alpha":
|
||||
|
@ -6577,6 +6618,9 @@ def get_llm_provider(
|
|||
or get_secret("FIREWORKSAI_API_KEY")
|
||||
or get_secret("FIREWORKS_AI_TOKEN")
|
||||
)
|
||||
elif custom_llm_provider == "azure_ai":
|
||||
api_base = api_base or get_secret("AZURE_AI_API_BASE") # type: ignore
|
||||
dynamic_api_key = api_key or get_secret("AZURE_AI_API_KEY")
|
||||
elif custom_llm_provider == "mistral":
|
||||
# mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai
|
||||
api_base = (
|
||||
|
@ -7458,7 +7502,6 @@ def validate_environment(model: Optional[str] = None) -> dict:
|
|||
|
||||
|
||||
def set_callbacks(callback_list, function_id=None):
|
||||
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||
|
||||
try:
|
||||
|
@ -8767,6 +8810,13 @@ def exception_type(
|
|||
response=original_exception.response,
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
if "Request failed during generation" in error_str:
|
||||
# this is an internal server error from predibase
|
||||
raise litellm.InternalServerError(
|
||||
message=f"PredibaseException - {error_str}",
|
||||
llm_provider="predibase",
|
||||
model=model,
|
||||
)
|
||||
elif hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 500:
|
||||
exception_mapping_worked = True
|
||||
|
@ -9085,7 +9135,7 @@ def exception_type(
|
|||
):
|
||||
exception_mapping_worked = True
|
||||
raise RateLimitError(
|
||||
message=f"VertexAIException RateLimitError - {error_str}",
|
||||
message=f"litellm.RateLimitError: VertexAIException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
litellm_debug_info=extra_information,
|
||||
|
@ -9097,7 +9147,14 @@ def exception_type(
|
|||
),
|
||||
),
|
||||
)
|
||||
|
||||
elif "500 Internal Server Error" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise ServiceUnavailableError(
|
||||
message=f"litellm.ServiceUnavailableError: VertexAIException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
if hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 400:
|
||||
exception_mapping_worked = True
|
||||
|
@ -10048,6 +10105,14 @@ def get_secret(
|
|||
return oidc_token
|
||||
else:
|
||||
raise ValueError("Github OIDC provider failed")
|
||||
elif oidc_provider == "azure":
|
||||
# https://azure.github.io/azure-workload-identity/docs/quick-start.html
|
||||
azure_federated_token_file = os.getenv("AZURE_FEDERATED_TOKEN_FILE")
|
||||
if azure_federated_token_file is None:
|
||||
raise ValueError("AZURE_FEDERATED_TOKEN_FILE not found in environment")
|
||||
with open(azure_federated_token_file, "r") as f:
|
||||
oidc_token = f.read()
|
||||
return oidc_token
|
||||
else:
|
||||
raise ValueError("Unsupported OIDC provider")
|
||||
|
||||
|
@ -11311,7 +11376,6 @@ class CustomStreamWrapper:
|
|||
raise StopIteration
|
||||
response_obj: GenericStreamingChunk = chunk
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
|
||||
|
@ -11326,6 +11390,10 @@ class CustomStreamWrapper:
|
|||
completion_tokens=response_obj["usage"]["outputTokens"],
|
||||
total_tokens=response_obj["usage"]["totalTokens"],
|
||||
)
|
||||
|
||||
if "tool_use" in response_obj and response_obj["tool_use"] is not None:
|
||||
completion_obj["tool_calls"] = [response_obj["tool_use"]]
|
||||
|
||||
elif self.custom_llm_provider == "sagemaker":
|
||||
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
|
||||
response_obj = self.handle_sagemaker_stream(chunk)
|
||||
|
@ -11342,7 +11410,6 @@ class CustomStreamWrapper:
|
|||
new_chunk = self.completion_stream[:chunk_size]
|
||||
completion_obj["content"] = new_chunk
|
||||
self.completion_stream = self.completion_stream[chunk_size:]
|
||||
time.sleep(0.05)
|
||||
elif self.custom_llm_provider == "palm":
|
||||
# fake streaming
|
||||
response_obj = {}
|
||||
|
@ -11355,7 +11422,6 @@ class CustomStreamWrapper:
|
|||
new_chunk = self.completion_stream[:chunk_size]
|
||||
completion_obj["content"] = new_chunk
|
||||
self.completion_stream = self.completion_stream[chunk_size:]
|
||||
time.sleep(0.05)
|
||||
elif self.custom_llm_provider == "ollama":
|
||||
response_obj = self.handle_ollama_stream(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
@ -11442,7 +11508,7 @@ class CustomStreamWrapper:
|
|||
# for azure, we need to pass the model from the orignal chunk
|
||||
self.model = chunk.model
|
||||
response_obj = self.handle_openai_chat_completion_chunk(chunk)
|
||||
if response_obj == None:
|
||||
if response_obj is None:
|
||||
return
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
|
@ -11575,7 +11641,7 @@ class CustomStreamWrapper:
|
|||
else:
|
||||
if (
|
||||
self.stream_options is not None
|
||||
and self.stream_options["include_usage"] == True
|
||||
and self.stream_options["include_usage"] is True
|
||||
):
|
||||
return model_response
|
||||
return
|
||||
|
@ -11600,8 +11666,14 @@ class CustomStreamWrapper:
|
|||
return model_response
|
||||
elif (
|
||||
"content" in completion_obj
|
||||
and isinstance(completion_obj["content"], str)
|
||||
and len(completion_obj["content"]) > 0
|
||||
and (
|
||||
isinstance(completion_obj["content"], str)
|
||||
and len(completion_obj["content"]) > 0
|
||||
)
|
||||
or (
|
||||
"tool_calls" in completion_obj
|
||||
and len(completion_obj["tool_calls"]) > 0
|
||||
)
|
||||
): # cannot set content of an OpenAI Object to be an empty string
|
||||
hold, model_response_str = self.check_special_tokens(
|
||||
chunk=completion_obj["content"],
|
||||
|
@ -11657,7 +11729,7 @@ class CustomStreamWrapper:
|
|||
else:
|
||||
## else
|
||||
completion_obj["content"] = model_response_str
|
||||
if self.sent_first_chunk == False:
|
||||
if self.sent_first_chunk is False:
|
||||
completion_obj["role"] = "assistant"
|
||||
self.sent_first_chunk = True
|
||||
model_response.choices[0].delta = Delta(**completion_obj)
|
||||
|
@ -11666,7 +11738,7 @@ class CustomStreamWrapper:
|
|||
else:
|
||||
return
|
||||
elif self.received_finish_reason is not None:
|
||||
if self.sent_last_chunk == True:
|
||||
if self.sent_last_chunk is True:
|
||||
raise StopIteration
|
||||
# flush any remaining holding chunk
|
||||
if len(self.holding_chunk) > 0:
|
||||
|
|
|
@ -1387,6 +1387,26 @@
|
|||
"mode": "image_generation",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"text-embedding-004": {
|
||||
"max_tokens": 3072,
|
||||
"max_input_tokens": 3072,
|
||||
"output_vector_size": 768,
|
||||
"input_cost_per_token": 0.00000000625,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-embedding-models",
|
||||
"mode": "embedding",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||
},
|
||||
"text-multilingual-embedding-002": {
|
||||
"max_tokens": 2048,
|
||||
"max_input_tokens": 2048,
|
||||
"output_vector_size": 768,
|
||||
"input_cost_per_token": 0.00000000625,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-embedding-models",
|
||||
"mode": "embedding",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||
},
|
||||
"textembedding-gecko": {
|
||||
"max_tokens": 3072,
|
||||
"max_input_tokens": 3072,
|
||||
|
|
50
poetry.lock
generated
50
poetry.lock
generated
|
@ -1545,6 +1545,53 @@ files = [
|
|||
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mypy"
|
||||
version = "1.10.0"
|
||||
description = "Optional static typing for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"},
|
||||
{file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"},
|
||||
{file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"},
|
||||
{file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"},
|
||||
{file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"},
|
||||
{file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"},
|
||||
{file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"},
|
||||
{file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"},
|
||||
{file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"},
|
||||
{file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"},
|
||||
{file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"},
|
||||
{file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"},
|
||||
{file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"},
|
||||
{file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"},
|
||||
{file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"},
|
||||
{file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"},
|
||||
{file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"},
|
||||
{file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"},
|
||||
{file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"},
|
||||
{file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"},
|
||||
{file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"},
|
||||
{file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"},
|
||||
{file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"},
|
||||
{file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"},
|
||||
{file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"},
|
||||
{file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"},
|
||||
{file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
mypy-extensions = ">=1.0.0"
|
||||
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||
typing-extensions = ">=4.1.0"
|
||||
|
||||
[package.extras]
|
||||
dmypy = ["psutil (>=4.0)"]
|
||||
install-types = ["pip"]
|
||||
mypyc = ["setuptools (>=50)"]
|
||||
reports = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "mypy-extensions"
|
||||
version = "1.0.0"
|
||||
|
@ -2127,6 +2174,7 @@ files = [
|
|||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
|
@ -3150,4 +3198,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"
|
||||
content-hash = "73054c657782120d170dc168ef07b494a916f1f810ff9c2b0ac878bd857a9dac"
|
||||
|
|
|
@ -85,6 +85,9 @@ model_list:
|
|||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: mistral-embed
|
||||
litellm_params:
|
||||
model: mistral/mistral-embed
|
||||
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
|
||||
litellm_params:
|
||||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.40.8"
|
||||
version = "1.40.9"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -19,7 +19,7 @@ documentation = "https://docs.litellm.ai"
|
|||
python = ">=3.8.1,<4.0, !=3.9.7"
|
||||
openai = ">=1.27.0"
|
||||
python-dotenv = ">=0.2.0"
|
||||
tiktoken = ">=0.4.0"
|
||||
tiktoken = ">=0.7.0"
|
||||
importlib-metadata = ">=6.8.0"
|
||||
tokenizers = "*"
|
||||
click = "*"
|
||||
|
@ -76,6 +76,7 @@ litellm = 'litellm:run_server'
|
|||
[tool.poetry.group.dev.dependencies]
|
||||
flake8 = "^6.1.0"
|
||||
black = "^23.12.0"
|
||||
mypy = "^1.0"
|
||||
pytest = "^7.4.3"
|
||||
pytest-mock = "^3.12.0"
|
||||
|
||||
|
@ -84,7 +85,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.40.8"
|
||||
version = "1.40.9"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -34,7 +34,7 @@ opentelemetry-exporter-otlp==1.25.0
|
|||
|
||||
### LITELLM PACKAGE DEPENDENCIES
|
||||
python-dotenv==1.0.0 # for env
|
||||
tiktoken==0.6.0 # for calculating usage
|
||||
tiktoken==0.7.0 # for calculating usage
|
||||
importlib-metadata==6.8.0 # for random utils
|
||||
tokenizers==0.14.0 # for calculating usage
|
||||
click==8.1.7 # for proxy cli
|
||||
|
|
|
@ -91,7 +91,7 @@ model LiteLLM_TeamTable {
|
|||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
model_id Int? @unique
|
||||
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
||||
}
|
||||
|
|
|
@ -664,7 +664,7 @@ async def test_key_crossing_budget():
|
|||
response = await chat_completion(session=session, key=key)
|
||||
pytest.fail("Should have failed - Key crossed it's budget")
|
||||
except Exception as e:
|
||||
assert "ExceededTokenBudget: Current spend for token:" in str(e)
|
||||
assert "Budget has been exceeded!" in str(e)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||
|
|
|
@ -22,6 +22,7 @@ async def generate_key(
|
|||
"text-embedding-ada-002",
|
||||
"dall-e-2",
|
||||
"fake-openai-endpoint-2",
|
||||
"mistral-embed",
|
||||
],
|
||||
):
|
||||
url = "http://0.0.0.0:4000/key/generate"
|
||||
|
@ -197,14 +198,14 @@ async def completion(session, key):
|
|||
return response
|
||||
|
||||
|
||||
async def embeddings(session, key):
|
||||
async def embeddings(session, key, model="text-embedding-ada-002"):
|
||||
url = "http://0.0.0.0:4000/embeddings"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = {
|
||||
"model": "text-embedding-ada-002",
|
||||
"model": model,
|
||||
"input": ["hello world"],
|
||||
}
|
||||
|
||||
|
@ -408,6 +409,9 @@ async def test_embeddings():
|
|||
key_2 = key_gen["key"]
|
||||
await embeddings(session=session, key=key_2)
|
||||
|
||||
# embedding request with non OpenAI model
|
||||
await embeddings(session=session, key=key, model="mistral-embed")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_image_generation():
|
||||
|
|
|
@ -49,7 +49,7 @@ async def new_user(
|
|||
|
||||
|
||||
async def add_member(
|
||||
session, i, team_id, user_id=None, user_email=None, max_budget=None
|
||||
session, i, team_id, user_id=None, user_email=None, max_budget=None, members=None
|
||||
):
|
||||
url = "http://0.0.0.0:4000/team/member_add"
|
||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||
|
@ -58,10 +58,13 @@ async def add_member(
|
|||
data["member"]["user_email"] = user_email
|
||||
elif user_id is not None:
|
||||
data["member"]["user_id"] = user_id
|
||||
elif members is not None:
|
||||
data["member"] = members
|
||||
|
||||
if max_budget is not None:
|
||||
data["max_budget_in_team"] = max_budget
|
||||
|
||||
print("sent data: {}".format(data))
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
@ -339,7 +342,7 @@ async def test_team_info():
|
|||
async def test_team_update_sc_2():
|
||||
"""
|
||||
- Create team
|
||||
- Add 1 user (doesn't exist in db)
|
||||
- Add 3 users (doesn't exist in db)
|
||||
- Change team alias
|
||||
- Check if it works
|
||||
- Assert team object unchanged besides team alias
|
||||
|
@ -353,15 +356,20 @@ async def test_team_update_sc_2():
|
|||
{"role": "admin", "user_id": admin_user},
|
||||
]
|
||||
team_data = await new_team(session=session, i=0, member_list=member_list)
|
||||
## Create new normal user
|
||||
new_normal_user = f"krrish_{uuid.uuid4()}@berri.ai"
|
||||
## Create 10 normal users
|
||||
members = [
|
||||
{"role": "user", "user_id": f"krrish_{uuid.uuid4()}@berri.ai"}
|
||||
for _ in range(10)
|
||||
]
|
||||
await add_member(
|
||||
session=session,
|
||||
i=0,
|
||||
team_id=team_data["team_id"],
|
||||
user_id=None,
|
||||
user_email=new_normal_user,
|
||||
session=session, i=0, team_id=team_data["team_id"], members=members
|
||||
)
|
||||
## ASSERT TEAM SIZE
|
||||
team_info = await get_team_info(
|
||||
session=session, get_team=team_data["team_id"], call_key="sk-1234"
|
||||
)
|
||||
|
||||
assert len(team_info["team_info"]["members_with_roles"]) == 12
|
||||
|
||||
## CHANGE TEAM ALIAS
|
||||
|
||||
|
@ -570,4 +578,4 @@ async def test_users_in_team_budget():
|
|||
except Exception as e:
|
||||
print("got exception, this is expected")
|
||||
print(e)
|
||||
assert "Crossed spend within team" in str(e)
|
||||
assert "Budget has been exceeded" in str(e)
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-d61796ff0d3a8faf.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tghLG7_IS7i5OkQJRvCIl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d12f0c7c134d3e60.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[45980,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-17b0c91edd3a24fe.js\",\"931\",\"static/chunks/app/page-bd882aee817406ff.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f02cb03d96e276ef.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"48nWsJi-LJrUlOLzcK-Yz\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-d61796ff0d3a8faf.js"],""]
|
||||
3:I[45980,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-17b0c91edd3a24fe.js","931","static/chunks/app/page-bd882aee817406ff.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
|||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-17b0c91edd3a24fe.js","418","static/chunks/app/model_hub/page-4cb65c32467214b5.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
|||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-17b0c91edd3a24fe.js","461","static/chunks/app/onboarding/page-664c7288e11fff5a.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["tghLG7_IS7i5OkQJRvCIl",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["48nWsJi-LJrUlOLzcK-Yz",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f02cb03d96e276ef.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -119,9 +119,24 @@ const ChatUI: React.FC<ChatUIProps> = ({
|
|||
|
||||
// Now, 'options' contains the list you wanted
|
||||
console.log(options); // You can log it to verify the list
|
||||
|
||||
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
|
||||
setModelInfo(options);
|
||||
|
||||
// if options.length > 0, only store unique values
|
||||
if (options.length > 0) {
|
||||
const uniqueModels = Array.from(new Set(options));
|
||||
|
||||
console.log("Unique models:", uniqueModels);
|
||||
|
||||
// sort uniqueModels alphabetically
|
||||
uniqueModels.sort((a: any, b: any) => a.label.localeCompare(b.label));
|
||||
|
||||
|
||||
console.log("Model info:", modelInfo);
|
||||
|
||||
// setModelInfo(options) should be inside the if block to avoid setting it when no data is available
|
||||
setModelInfo(uniqueModels);
|
||||
}
|
||||
|
||||
|
||||
setSelectedModel(fetchedAvailableModels.data[0].id);
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
|
@ -1130,7 +1130,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
setSelectedAPIKey(key);
|
||||
}}
|
||||
>
|
||||
✨ {key["key_alias"]} (Enterpise only Feature)
|
||||
✨ {key["key_alias"]} (Enterprise only Feature)
|
||||
</SelectItem>
|
||||
);
|
||||
}
|
||||
|
@ -1165,7 +1165,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
setSelectedCustomer(user);
|
||||
}}
|
||||
>
|
||||
✨ {user} (Enterpise only Feature)
|
||||
✨ {user} (Enterprise only Feature)
|
||||
</SelectItem>
|
||||
);
|
||||
})
|
||||
|
|
|
@ -114,7 +114,7 @@ const Navbar: React.FC<NavbarProps> = ({
|
|||
textDecoration: "underline",
|
||||
}}
|
||||
>
|
||||
Get enterpise license
|
||||
Get enterprise license
|
||||
</a>
|
||||
</div>
|
||||
) : null}
|
||||
|
|
|
@ -832,7 +832,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
// @ts-ignore
|
||||
disabled={true}
|
||||
>
|
||||
✨ {tag} (Enterpise only Feature)
|
||||
✨ {tag} (Enterprise only Feature)
|
||||
</SelectItem>
|
||||
);
|
||||
})}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue