Merge branch 'main' into litellm_fix_caching_reasoning
1
.gitignore
vendored
|
@ -86,3 +86,4 @@ litellm/proxy/db/migrations/0_init/migration.sql
|
||||||
litellm/proxy/db/migrations/*
|
litellm/proxy/db/migrations/*
|
||||||
litellm/proxy/migrations/*config.yaml
|
litellm/proxy/migrations/*config.yaml
|
||||||
litellm/proxy/migrations/*
|
litellm/proxy/migrations/*
|
||||||
|
tests/litellm/litellm_core_utils/llm_cost_calc/log.txt
|
||||||
|
|
|
@ -4,7 +4,7 @@ Pass-through endpoints for Cohere - call provider-specific endpoint, in native f
|
||||||
|
|
||||||
| Feature | Supported | Notes |
|
| Feature | Supported | Notes |
|
||||||
|-------|-------|-------|
|
|-------|-------|-------|
|
||||||
| Cost Tracking | ✅ | works across all integrations |
|
| Cost Tracking | ✅ | Supported for `/v1/chat`, and `/v2/chat` |
|
||||||
| Logging | ✅ | works across all integrations |
|
| Logging | ✅ | works across all integrations |
|
||||||
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
||||||
| Streaming | ✅ | |
|
| Streaming | ✅ | |
|
||||||
|
|
217
docs/my-website/docs/pass_through/mistral.md
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
# Mistral
|
||||||
|
|
||||||
|
Pass-through endpoints for Mistral - call provider-specific endpoint, in native format (no translation).
|
||||||
|
|
||||||
|
| Feature | Supported | Notes |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| Cost Tracking | ❌ | Not supported |
|
||||||
|
| Logging | ✅ | works across all integrations |
|
||||||
|
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
||||||
|
| Streaming | ✅ | |
|
||||||
|
|
||||||
|
Just replace `https://api.mistral.ai/v1` with `LITELLM_PROXY_BASE_URL/mistral` 🚀
|
||||||
|
|
||||||
|
#### **Example Usage**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/mistral/v1/ocr' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "mistral-ocr-latest",
|
||||||
|
"document": {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": "https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png"
|
||||||
|
}
|
||||||
|
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Supports **ALL** Mistral Endpoints (including streaming).
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
Let's call the Mistral [`/chat/completions` endpoint](https://docs.mistral.ai/api/#tag/chat/operation/chat_completion_v1_chat_completions_post)
|
||||||
|
|
||||||
|
1. Add MISTRAL_API_KEY to your environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export MISTRAL_API_KEY="sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start LiteLLM Proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
Let's call the Mistral `/ocr` endpoint
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/mistral/v1/ocr' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "mistral-ocr-latest",
|
||||||
|
"document": {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": "https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png"
|
||||||
|
}
|
||||||
|
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
Anything after `http://0.0.0.0:4000/mistral` is treated as a provider-specific route, and handled accordingly.
|
||||||
|
|
||||||
|
Key Changes:
|
||||||
|
|
||||||
|
| **Original Endpoint** | **Replace With** |
|
||||||
|
|------------------------------------------------------|-----------------------------------|
|
||||||
|
| `https://api.mistral.ai/v1` | `http://0.0.0.0:4000/mistral` (LITELLM_PROXY_BASE_URL="http://0.0.0.0:4000") |
|
||||||
|
| `bearer $MISTRAL_API_KEY` | `bearer anything` (use `bearer LITELLM_VIRTUAL_KEY` if Virtual Keys are setup on proxy) |
|
||||||
|
|
||||||
|
|
||||||
|
### **Example 1: OCR endpoint**
|
||||||
|
|
||||||
|
#### LiteLLM Proxy Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/mistral/v1/ocr' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_API_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "mistral-ocr-latest",
|
||||||
|
"document": {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": "https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Direct Mistral API Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl https://api.mistral.ai/v1/ocr \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer ${MISTRAL_API_KEY}" \
|
||||||
|
-d '{
|
||||||
|
"model": "mistral-ocr-latest",
|
||||||
|
"document": {
|
||||||
|
"type": "document_url",
|
||||||
|
"document_url": "https://arxiv.org/pdf/2201.04234"
|
||||||
|
},
|
||||||
|
"include_image_base64": true
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Example 2: Chat API**
|
||||||
|
|
||||||
|
#### LiteLLM Proxy Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/mistral/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I am going to Paris, what should I see?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"top_p": 0.1,
|
||||||
|
"model": "mistral-large-latest",
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Direct Mistral API Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'https://api.mistral.ai/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I am going to Paris, what should I see?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"top_p": 0.1,
|
||||||
|
"model": "mistral-large-latest",
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced - Use with Virtual Keys
|
||||||
|
|
||||||
|
Pre-requisites
|
||||||
|
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
|
||||||
|
|
||||||
|
Use this, to avoid giving developers the raw Mistral API key, but still letting them use Mistral endpoints.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
1. Setup environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DATABASE_URL=""
|
||||||
|
export LITELLM_MASTER_KEY=""
|
||||||
|
export MISTRAL_API_BASE=""
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Generate virtual key
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"key": "sk-1234ewknldferwedojwojw"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/mistral/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234ewknldferwedojwojw' \
|
||||||
|
--data '{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I am going to Paris, what should I see?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"top_p": 0.1,
|
||||||
|
"model": "qwen2.5-7b-instruct",
|
||||||
|
}'
|
||||||
|
```
|
185
docs/my-website/docs/pass_through/vllm.md
Normal file
|
@ -0,0 +1,185 @@
|
||||||
|
# VLLM
|
||||||
|
|
||||||
|
Pass-through endpoints for VLLM - call provider-specific endpoint, in native format (no translation).
|
||||||
|
|
||||||
|
| Feature | Supported | Notes |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| Cost Tracking | ❌ | Not supported |
|
||||||
|
| Logging | ✅ | works across all integrations |
|
||||||
|
| End-user Tracking | ❌ | [Tell us if you need this](https://github.com/BerriAI/litellm/issues/new) |
|
||||||
|
| Streaming | ✅ | |
|
||||||
|
|
||||||
|
Just replace `https://my-vllm-server.com` with `LITELLM_PROXY_BASE_URL/vllm` 🚀
|
||||||
|
|
||||||
|
#### **Example Usage**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X GET 'http://0.0.0.0:4000/vllm/metrics' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
```
|
||||||
|
|
||||||
|
Supports **ALL** VLLM Endpoints (including streaming).
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
Let's call the VLLM [`/metrics` endpoint](https://vllm.readthedocs.io/en/latest/api_reference/api_reference.html)
|
||||||
|
|
||||||
|
1. Add HOSTED VLLM API BASE to your environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export HOSTED_VLLM_API_BASE="https://my-vllm-server.com"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start LiteLLM Proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
Let's call the VLLM `/metrics` endpoint
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X GET 'http://0.0.0.0:4000/vllm/metrics' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
Anything after `http://0.0.0.0:4000/vllm` is treated as a provider-specific route, and handled accordingly.
|
||||||
|
|
||||||
|
Key Changes:
|
||||||
|
|
||||||
|
| **Original Endpoint** | **Replace With** |
|
||||||
|
|------------------------------------------------------|-----------------------------------|
|
||||||
|
| `https://my-vllm-server.com` | `http://0.0.0.0:4000/vllm` (LITELLM_PROXY_BASE_URL="http://0.0.0.0:4000") |
|
||||||
|
| `bearer $VLLM_API_KEY` | `bearer anything` (use `bearer LITELLM_VIRTUAL_KEY` if Virtual Keys are setup on proxy) |
|
||||||
|
|
||||||
|
|
||||||
|
### **Example 1: Metrics endpoint**
|
||||||
|
|
||||||
|
#### LiteLLM Proxy Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X GET 'http://0.0.0.0:4000/vllm/metrics' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Direct VLLM API Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X GET 'https://my-vllm-server.com/metrics' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Example 2: Chat API**
|
||||||
|
|
||||||
|
#### LiteLLM Proxy Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/vllm/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I am going to Paris, what should I see?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"top_p": 0.1,
|
||||||
|
"model": "qwen2.5-7b-instruct",
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Direct VLLM API Call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'https://my-vllm-server.com/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I am going to Paris, what should I see?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"top_p": 0.1,
|
||||||
|
"model": "qwen2.5-7b-instruct",
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced - Use with Virtual Keys
|
||||||
|
|
||||||
|
Pre-requisites
|
||||||
|
- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
|
||||||
|
|
||||||
|
Use this, to avoid giving developers the raw Cohere API key, but still letting them use Cohere endpoints.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
1. Setup environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export DATABASE_URL=""
|
||||||
|
export LITELLM_MASTER_KEY=""
|
||||||
|
export HOSTED_VLLM_API_BASE=""
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Generate virtual key
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"key": "sk-1234ewknldferwedojwojw"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/vllm/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234ewknldferwedojwojw' \
|
||||||
|
--data '{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I am going to Paris, what should I see?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"top_p": 0.1,
|
||||||
|
"model": "qwen2.5-7b-instruct",
|
||||||
|
}'
|
||||||
|
```
|
|
@ -1011,8 +1011,7 @@ Expected Response:
|
||||||
| Supported Operations | `/v1/responses`|
|
| Supported Operations | `/v1/responses`|
|
||||||
| Azure OpenAI Responses API | [Azure OpenAI Responses API ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/responses?tabs=python-secure) |
|
| Azure OpenAI Responses API | [Azure OpenAI Responses API ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/responses?tabs=python-secure) |
|
||||||
| Cost Tracking, Logging Support | ✅ LiteLLM will log, track cost for Responses API Requests |
|
| Cost Tracking, Logging Support | ✅ LiteLLM will log, track cost for Responses API Requests |
|
||||||
|
| Supported OpenAI Params | ✅ All OpenAI params are supported, [See here](https://github.com/BerriAI/litellm/blob/0717369ae6969882d149933da48eeb8ab0e691bd/litellm/llms/openai/responses/transformation.py#L23) |
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
|
@ -39,14 +39,164 @@ response = completion(
|
||||||
- temperature
|
- temperature
|
||||||
- top_p
|
- top_p
|
||||||
- max_tokens
|
- max_tokens
|
||||||
|
- max_completion_tokens
|
||||||
- stream
|
- stream
|
||||||
- tools
|
- tools
|
||||||
- tool_choice
|
- tool_choice
|
||||||
|
- functions
|
||||||
- response_format
|
- response_format
|
||||||
- n
|
- n
|
||||||
- stop
|
- stop
|
||||||
|
- logprobs
|
||||||
|
- frequency_penalty
|
||||||
|
- modalities
|
||||||
|
- reasoning_content
|
||||||
|
|
||||||
|
**Anthropic Params**
|
||||||
|
- thinking (used to set max budget tokens across anthropic/gemini models)
|
||||||
|
|
||||||
|
[**See Updated List**](https://github.com/BerriAI/litellm/blob/main/litellm/llms/gemini/chat/transformation.py#L70)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Usage - Thinking / `reasoning_content`
|
||||||
|
|
||||||
|
LiteLLM translates OpenAI's `reasoning_effort` to Gemini's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/620664921902d7a9bfb29897a7b27c1a7ef4ddfb/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py#L362)
|
||||||
|
|
||||||
|
**Mapping**
|
||||||
|
|
||||||
|
| reasoning_effort | thinking |
|
||||||
|
| ---------------- | -------- |
|
||||||
|
| "low" | "budget_tokens": 1024 |
|
||||||
|
| "medium" | "budget_tokens": 2048 |
|
||||||
|
| "high" | "budget_tokens": 4096 |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="gemini/gemini-2.5-flash-preview-04-17",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
reasoning_effort="low",
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: gemini-2.5-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.5-flash-preview-04-17
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.5-flash",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pass `thinking` to Gemini models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Gemini models.
|
||||||
|
|
||||||
|
This is translated to Gemini's [`thinkingConfig` parameter](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gemini/gemini-2.5-flash-preview-04-17",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini/gemini-2.5-flash-preview-04-17",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[**See Updated List**](https://github.com/BerriAI/litellm/blob/1c747f3ad372399c5b95cc5696b06a5fbe53186b/litellm/llms/vertex_httpx.py#L122)
|
|
||||||
|
|
||||||
## Passing Gemini Specific Params
|
## Passing Gemini Specific Params
|
||||||
### Response schema
|
### Response schema
|
||||||
|
|
|
@ -163,6 +163,12 @@ os.environ["OPENAI_API_BASE"] = "openaiai-api-base" # OPTIONAL
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|-----------------------|-----------------------------------------------------------------|
|
|-----------------------|-----------------------------------------------------------------|
|
||||||
|
| gpt-4.1 | `response = completion(model="gpt-4.1", messages=messages)` |
|
||||||
|
| gpt-4.1-mini | `response = completion(model="gpt-4.1-mini", messages=messages)` |
|
||||||
|
| gpt-4.1-nano | `response = completion(model="gpt-4.1-nano", messages=messages)` |
|
||||||
|
| o4-mini | `response = completion(model="o4-mini", messages=messages)` |
|
||||||
|
| o3-mini | `response = completion(model="o3-mini", messages=messages)` |
|
||||||
|
| o3 | `response = completion(model="o3", messages=messages)` |
|
||||||
| o1-mini | `response = completion(model="o1-mini", messages=messages)` |
|
| o1-mini | `response = completion(model="o1-mini", messages=messages)` |
|
||||||
| o1-preview | `response = completion(model="o1-preview", messages=messages)` |
|
| o1-preview | `response = completion(model="o1-preview", messages=messages)` |
|
||||||
| gpt-4o-mini | `response = completion(model="gpt-4o-mini", messages=messages)` |
|
| gpt-4o-mini | `response = completion(model="gpt-4o-mini", messages=messages)` |
|
||||||
|
|
|
@ -542,6 +542,154 @@ print(resp)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### **Thinking / `reasoning_content`**
|
||||||
|
|
||||||
|
LiteLLM translates OpenAI's `reasoning_effort` to Gemini's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/620664921902d7a9bfb29897a7b27c1a7ef4ddfb/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py#L362)
|
||||||
|
|
||||||
|
**Mapping**
|
||||||
|
|
||||||
|
| reasoning_effort | thinking |
|
||||||
|
| ---------------- | -------- |
|
||||||
|
| "low" | "budget_tokens": 1024 |
|
||||||
|
| "medium" | "budget_tokens": 2048 |
|
||||||
|
| "high" | "budget_tokens": 4096 |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# !gcloud auth application-default login - run this to add vertex credentials to your env
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="vertex_ai/gemini-2.5-flash-preview-04-17",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
reasoning_effort="low",
|
||||||
|
vertex_project="project-id",
|
||||||
|
vertex_location="us-central1"
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: gemini-2.5-flash
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini-2.5-flash-preview-04-17
|
||||||
|
vertex_credentials: {"project_id": "project-id", "location": "us-central1", "project_key": "project-key"}
|
||||||
|
vertex_project: "project-id"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.5-flash",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Pass `thinking` to Gemini models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Gemini models.
|
||||||
|
|
||||||
|
This is translated to Gemini's [`thinkingConfig` parameter](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# !gcloud auth application-default login - run this to add vertex credentials to your env
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="vertex_ai/gemini-2.5-flash-preview-04-17",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
vertex_project="project-id",
|
||||||
|
vertex_location="us-central1"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "vertex_ai/gemini-2.5-flash-preview-04-17",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
### **Context Caching**
|
### **Context Caching**
|
||||||
|
|
||||||
Use Vertex AI context caching is supported by calling provider api directly. (Unified Endpoint support comin soon.).
|
Use Vertex AI context caching is supported by calling provider api directly. (Unified Endpoint support comin soon.).
|
||||||
|
|
|
@ -161,6 +161,120 @@ curl -L -X POST 'http://0.0.0.0:4000/embeddings' \
|
||||||
|
|
||||||
Example Implementation from VLLM [here](https://github.com/vllm-project/vllm/pull/10020)
|
Example Implementation from VLLM [here](https://github.com/vllm-project/vllm/pull/10020)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="files_message" label="(Unified) Files Message">
|
||||||
|
|
||||||
|
Use this to send a video url to VLLM + Gemini in the same format, using OpenAI's `files` message type.
|
||||||
|
|
||||||
|
There are two ways to send a video url to VLLM:
|
||||||
|
|
||||||
|
1. Pass the video url directly
|
||||||
|
|
||||||
|
```
|
||||||
|
{"type": "file", "file": {"file_id": video_url}},
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Pass the video data as base64
|
||||||
|
|
||||||
|
```
|
||||||
|
{"type": "file", "file": {"file_data": f"data:video/mp4;base64,{video_data_base64}"}}
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Summarize the following video"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_id": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# call vllm
|
||||||
|
os.environ["HOSTED_VLLM_API_BASE"] = "https://hosted-vllm-api.co"
|
||||||
|
os.environ["HOSTED_VLLM_API_KEY"] = "" # [optional], if your VLLM server requires an API key
|
||||||
|
response = completion(
|
||||||
|
model="hosted_vllm/qwen", # pass the vllm model name
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
# call gemini
|
||||||
|
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-1.5-flash", # pass the gemini model name
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: my-model
|
||||||
|
litellm_params:
|
||||||
|
model: hosted_vllm/qwen # add hosted_vllm/ prefix to route as OpenAI provider
|
||||||
|
api_base: https://hosted-vllm-api.co # add api base for OpenAI compatible provider
|
||||||
|
- model_name: my-gemini-model
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-1.5-flash # add gemini/ prefix to route as Google AI Studio provider
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://0.0.0.0:4000/chat/completions \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "my-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content":
|
||||||
|
[
|
||||||
|
{"type": "text", "text": "Summarize the following video"},
|
||||||
|
{"type": "file", "file": {"file_id": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="video_url" label="(VLLM-specific) Video Message">
|
||||||
|
|
||||||
|
Use this to send a video url to VLLM in it's native message format (`video_url`).
|
||||||
|
|
||||||
There are two ways to send a video url to VLLM:
|
There are two ways to send a video url to VLLM:
|
||||||
|
|
||||||
1. Pass the video url directly
|
1. Pass the video url directly
|
||||||
|
@ -249,6 +363,10 @@ curl -X POST http://0.0.0.0:4000/chat/completions \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## (Deprecated) for `vllm pip package`
|
## (Deprecated) for `vllm pip package`
|
||||||
### Using - `litellm.completion`
|
### Using - `litellm.completion`
|
||||||
|
|
||||||
|
|
108
docs/my-website/docs/proxy/model_discovery.md
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
# Model Discovery
|
||||||
|
|
||||||
|
Use this to give users an accurate list of models available behind provider endpoint, when calling `/v1/models` for wildcard models.
|
||||||
|
|
||||||
|
## Supported Models
|
||||||
|
|
||||||
|
- Fireworks AI
|
||||||
|
- OpenAI
|
||||||
|
- Gemini
|
||||||
|
- LiteLLM Proxy
|
||||||
|
- Topaz
|
||||||
|
- Anthropic
|
||||||
|
- XAI
|
||||||
|
- VLLM
|
||||||
|
- Vertex AI
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
**1. Setup config.yaml**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: xai/*
|
||||||
|
litellm_params:
|
||||||
|
model: xai/*
|
||||||
|
api_key: os.environ/XAI_API_KEY
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
check_provider_endpoint: true # 👈 Enable checking provider endpoint for wildcard models
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Call `/v1/models`**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X GET "http://localhost:4000/v1/models" -H "Authorization: Bearer $LITELLM_KEY"
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": "xai/grok-2-1212",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-2-vision-1212",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-3-beta",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-3-fast-beta",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-3-mini-beta",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-3-mini-fast-beta",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-beta",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-vision-beta",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "xai/grok-2-image-1212",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1677610602,
|
||||||
|
"owned_by": "openai"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"object": "list"
|
||||||
|
}
|
||||||
|
```
|
|
@ -16,6 +16,8 @@ Supported Providers:
|
||||||
- Vertex AI (Anthropic) (`vertexai/`)
|
- Vertex AI (Anthropic) (`vertexai/`)
|
||||||
- OpenRouter (`openrouter/`)
|
- OpenRouter (`openrouter/`)
|
||||||
- XAI (`xai/`)
|
- XAI (`xai/`)
|
||||||
|
- Google AI Studio (`google/`)
|
||||||
|
- Vertex AI (`vertex_ai/`)
|
||||||
|
|
||||||
LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message.
|
LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message.
|
||||||
|
|
||||||
|
@ -23,7 +25,7 @@ LiteLLM will standardize the `reasoning_content` in the response and `thinking_b
|
||||||
"message": {
|
"message": {
|
||||||
...
|
...
|
||||||
"reasoning_content": "The capital of France is Paris.",
|
"reasoning_content": "The capital of France is Paris.",
|
||||||
"thinking_blocks": [
|
"thinking_blocks": [ # only returned for Anthropic models
|
||||||
{
|
{
|
||||||
"type": "thinking",
|
"type": "thinking",
|
||||||
"thinking": "The capital of France is Paris.",
|
"thinking": "The capital of France is Paris.",
|
||||||
|
|
|
@ -14,22 +14,22 @@ LiteLLM provides a BETA endpoint in the spec of [OpenAI's `/responses` API](http
|
||||||
| Fallbacks | ✅ | Works between supported models |
|
| Fallbacks | ✅ | Works between supported models |
|
||||||
| Loadbalancing | ✅ | Works between supported models |
|
| Loadbalancing | ✅ | Works between supported models |
|
||||||
| Supported LiteLLM Versions | 1.63.8+ | |
|
| Supported LiteLLM Versions | 1.63.8+ | |
|
||||||
| Supported LLM providers | `openai` | |
|
| Supported LLM providers | **All LiteLLM supported providers** | `openai`, `anthropic`, `bedrock`, `vertex_ai`, `gemini`, `azure`, `azure_ai` etc. |
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
## Create a model response
|
### LiteLLM Python SDK
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="litellm-sdk" label="LiteLLM SDK">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
#### Non-streaming
|
#### Non-streaming
|
||||||
```python showLineNumbers
|
```python showLineNumbers title="OpenAI Non-streaming Response"
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
# Non-streaming response
|
# Non-streaming response
|
||||||
response = litellm.responses(
|
response = litellm.responses(
|
||||||
model="o1-pro",
|
model="openai/o1-pro",
|
||||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
max_output_tokens=100
|
max_output_tokens=100
|
||||||
)
|
)
|
||||||
|
@ -38,12 +38,12 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Streaming
|
#### Streaming
|
||||||
```python showLineNumbers
|
```python showLineNumbers title="OpenAI Streaming Response"
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
# Streaming response
|
# Streaming response
|
||||||
response = litellm.responses(
|
response = litellm.responses(
|
||||||
model="o1-pro",
|
model="openai/o1-pro",
|
||||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
stream=True
|
stream=True
|
||||||
)
|
)
|
||||||
|
@ -53,58 +53,169 @@ for event in response:
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="proxy" label="OpenAI SDK with LiteLLM Proxy">
|
|
||||||
|
|
||||||
First, add this to your litellm proxy config.yaml:
|
<TabItem value="anthropic" label="Anthropic">
|
||||||
```yaml showLineNumbers
|
|
||||||
model_list:
|
|
||||||
- model_name: o1-pro
|
|
||||||
litellm_params:
|
|
||||||
model: openai/o1-pro
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
Start your LiteLLM proxy:
|
|
||||||
```bash
|
|
||||||
litellm --config /path/to/config.yaml
|
|
||||||
|
|
||||||
# RUNNING on http://0.0.0.0:4000
|
|
||||||
```
|
|
||||||
|
|
||||||
Then use the OpenAI SDK pointed to your proxy:
|
|
||||||
|
|
||||||
#### Non-streaming
|
#### Non-streaming
|
||||||
```python showLineNumbers
|
```python showLineNumbers title="Anthropic Non-streaming Response"
|
||||||
from openai import OpenAI
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
# Initialize client with your proxy URL
|
# Set API key
|
||||||
client = OpenAI(
|
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-api-key"
|
||||||
base_url="http://localhost:4000", # Your proxy URL
|
|
||||||
api_key="your-api-key" # Your proxy API key
|
|
||||||
)
|
|
||||||
|
|
||||||
# Non-streaming response
|
# Non-streaming response
|
||||||
response = client.responses.create(
|
response = litellm.responses(
|
||||||
model="o1-pro",
|
model="anthropic/claude-3-5-sonnet-20240620",
|
||||||
input="Tell me a three sentence bedtime story about a unicorn."
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
max_output_tokens=100
|
||||||
)
|
)
|
||||||
|
|
||||||
print(response)
|
print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Streaming
|
#### Streaming
|
||||||
```python showLineNumbers
|
```python showLineNumbers title="Anthropic Streaming Response"
|
||||||
from openai import OpenAI
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
# Initialize client with your proxy URL
|
# Set API key
|
||||||
client = OpenAI(
|
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-api-key"
|
||||||
base_url="http://localhost:4000", # Your proxy URL
|
|
||||||
api_key="your-api-key" # Your proxy API key
|
|
||||||
)
|
|
||||||
|
|
||||||
# Streaming response
|
# Streaming response
|
||||||
response = client.responses.create(
|
response = litellm.responses(
|
||||||
model="o1-pro",
|
model="anthropic/claude-3-5-sonnet-20240620",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="vertex" label="Vertex AI">
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="Vertex AI Non-streaming Response"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set credentials - Vertex AI uses application default credentials
|
||||||
|
# Run 'gcloud auth application-default login' to authenticate
|
||||||
|
os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id"
|
||||||
|
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="vertex_ai/gemini-1.5-pro",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
max_output_tokens=100
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="Vertex AI Streaming Response"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set credentials - Vertex AI uses application default credentials
|
||||||
|
# Run 'gcloud auth application-default login' to authenticate
|
||||||
|
os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id"
|
||||||
|
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="vertex_ai/gemini-1.5-pro",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="bedrock" label="AWS Bedrock">
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="AWS Bedrock Non-streaming Response"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set AWS credentials
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
|
||||||
|
os.environ["AWS_REGION_NAME"] = "us-west-2" # or your AWS region
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
max_output_tokens=100
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="AWS Bedrock Streaming Response"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set AWS credentials
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
|
||||||
|
os.environ["AWS_REGION_NAME"] = "us-west-2" # or your AWS region
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="gemini" label="Google AI Studio">
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="Google AI Studio Non-streaming Response"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set API key for Google AI Studio
|
||||||
|
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="gemini/gemini-1.5-flash",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
max_output_tokens=100
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="Google AI Studio Streaming Response"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set API key for Google AI Studio
|
||||||
|
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="gemini/gemini-1.5-flash",
|
||||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
stream=True
|
stream=True
|
||||||
)
|
)
|
||||||
|
@ -116,10 +227,296 @@ for event in response:
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
### LiteLLM Proxy with OpenAI SDK
|
||||||
|
|
||||||
## **Supported Providers**
|
First, set up and start your LiteLLM proxy server.
|
||||||
|
|
||||||
|
```bash title="Start LiteLLM Proxy Server"
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
First, add this to your litellm proxy config.yaml:
|
||||||
|
```yaml showLineNumbers title="OpenAI Proxy Configuration"
|
||||||
|
model_list:
|
||||||
|
- model_name: openai/o1-pro
|
||||||
|
litellm_params:
|
||||||
|
model: openai/o1-pro
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="OpenAI Proxy Non-streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="openai/o1-pro",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="OpenAI Proxy Streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="openai/o1-pro",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="anthropic" label="Anthropic">
|
||||||
|
|
||||||
|
First, add this to your litellm proxy config.yaml:
|
||||||
|
```yaml showLineNumbers title="Anthropic Proxy Configuration"
|
||||||
|
model_list:
|
||||||
|
- model_name: anthropic/claude-3-5-sonnet-20240620
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-5-sonnet-20240620
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="Anthropic Proxy Non-streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="anthropic/claude-3-5-sonnet-20240620",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="Anthropic Proxy Streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="anthropic/claude-3-5-sonnet-20240620",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="vertex" label="Vertex AI">
|
||||||
|
|
||||||
|
First, add this to your litellm proxy config.yaml:
|
||||||
|
```yaml showLineNumbers title="Vertex AI Proxy Configuration"
|
||||||
|
model_list:
|
||||||
|
- model_name: vertex_ai/gemini-1.5-pro
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini-1.5-pro
|
||||||
|
vertex_project: your-gcp-project-id
|
||||||
|
vertex_location: us-central1
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="Vertex AI Proxy Non-streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="vertex_ai/gemini-1.5-pro",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="Vertex AI Proxy Streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="vertex_ai/gemini-1.5-pro",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="bedrock" label="AWS Bedrock">
|
||||||
|
|
||||||
|
First, add this to your litellm proxy config.yaml:
|
||||||
|
```yaml showLineNumbers title="AWS Bedrock Proxy Configuration"
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
|
||||||
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
||||||
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
||||||
|
aws_region_name: us-west-2
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="AWS Bedrock Proxy Non-streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="AWS Bedrock Proxy Streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="gemini" label="Google AI Studio">
|
||||||
|
|
||||||
|
First, add this to your litellm proxy config.yaml:
|
||||||
|
```yaml showLineNumbers title="Google AI Studio Proxy Configuration"
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini/gemini-1.5-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-1.5-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python showLineNumbers title="Google AI Studio Proxy Non-streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="gemini/gemini-1.5-flash",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python showLineNumbers title="Google AI Studio Proxy Streaming Response"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="gemini/gemini-1.5-flash",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Supported Responses API Parameters
|
||||||
|
|
||||||
|
| Provider | Supported Parameters |
|
||||||
|
|----------|---------------------|
|
||||||
|
| `openai` | [All Responses API parameters are supported](https://github.com/BerriAI/litellm/blob/7c3df984da8e4dff9201e4c5353fdc7a2b441831/litellm/llms/openai/responses/transformation.py#L23) |
|
||||||
|
| `azure` | [All Responses API parameters are supported](https://github.com/BerriAI/litellm/blob/7c3df984da8e4dff9201e4c5353fdc7a2b441831/litellm/llms/openai/responses/transformation.py#L23) |
|
||||||
|
| `anthropic` | [See supported parameters here](https://github.com/BerriAI/litellm/blob/f39d9178868662746f159d5ef642c7f34f9bfe5f/litellm/responses/litellm_completion_transformation/transformation.py#L57) |
|
||||||
|
| `bedrock` | [See supported parameters here](https://github.com/BerriAI/litellm/blob/f39d9178868662746f159d5ef642c7f34f9bfe5f/litellm/responses/litellm_completion_transformation/transformation.py#L57) |
|
||||||
|
| `gemini` | [See supported parameters here](https://github.com/BerriAI/litellm/blob/f39d9178868662746f159d5ef642c7f34f9bfe5f/litellm/responses/litellm_completion_transformation/transformation.py#L57) |
|
||||||
|
| `vertex_ai` | [See supported parameters here](https://github.com/BerriAI/litellm/blob/f39d9178868662746f159d5ef642c7f34f9bfe5f/litellm/responses/litellm_completion_transformation/transformation.py#L57) |
|
||||||
|
| `azure_ai` | [See supported parameters here](https://github.com/BerriAI/litellm/blob/f39d9178868662746f159d5ef642c7f34f9bfe5f/litellm/responses/litellm_completion_transformation/transformation.py#L57) |
|
||||||
|
| All other llm api providers | [See supported parameters here](https://github.com/BerriAI/litellm/blob/f39d9178868662746f159d5ef642c7f34f9bfe5f/litellm/responses/litellm_completion_transformation/transformation.py#L57) |
|
||||||
|
|
||||||
| Provider | Link to Usage |
|
|
||||||
|-------------|--------------------|
|
|
||||||
| OpenAI| [Usage](#usage) |
|
|
||||||
| Azure OpenAI| [Usage](../docs/providers/azure#responses-api) |
|
|
146
docs/my-website/docs/tutorials/openai_codex.md
Normal file
|
@ -0,0 +1,146 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# Using LiteLLM with OpenAI Codex
|
||||||
|
|
||||||
|
This guide walks you through connecting OpenAI Codex to LiteLLM. Using LiteLLM with Codex allows teams to:
|
||||||
|
- Access 100+ LLMs through the Codex interface
|
||||||
|
- Use powerful models like Gemini through a familiar interface
|
||||||
|
- Track spend and usage with LiteLLM's built-in analytics
|
||||||
|
- Control model access with virtual keys
|
||||||
|
|
||||||
|
<Image img={require('../../img/litellm_codex.gif')} />
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Requires LiteLLM v1.66.3.dev5 and higher
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
Make sure to set up LiteLLM with the [LiteLLM Getting Started Guide](../proxy/docker_quick_start.md).
|
||||||
|
|
||||||
|
## 1. Install OpenAI Codex
|
||||||
|
|
||||||
|
Install the OpenAI Codex CLI tool globally using npm:
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="npm" label="npm">
|
||||||
|
|
||||||
|
```bash showLineNumbers
|
||||||
|
npm i -g @openai/codex
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="yarn" label="yarn">
|
||||||
|
|
||||||
|
```bash showLineNumbers
|
||||||
|
yarn global add @openai/codex
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## 2. Start LiteLLM Proxy
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="docker" label="Docker">
|
||||||
|
|
||||||
|
```bash showLineNumbers
|
||||||
|
docker run \
|
||||||
|
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||||
|
-p 4000:4000 \
|
||||||
|
ghcr.io/berriai/litellm:main-latest \
|
||||||
|
--config /app/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="pip" label="LiteLLM CLI">
|
||||||
|
|
||||||
|
```bash showLineNumbers
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
LiteLLM should now be running on [http://localhost:4000](http://localhost:4000)
|
||||||
|
|
||||||
|
## 3. Configure LiteLLM for Model Routing
|
||||||
|
|
||||||
|
Ensure your LiteLLM Proxy is properly configured to route to your desired models. Create a `litellm_config.yaml` file with the following content:
|
||||||
|
|
||||||
|
```yaml showLineNumbers
|
||||||
|
model_list:
|
||||||
|
- model_name: o3-mini
|
||||||
|
litellm_params:
|
||||||
|
model: openai/o3-mini
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
- model_name: claude-3-7-sonnet-latest
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-7-sonnet-latest
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
- model_name: gemini-2.0-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
drop_params: true
|
||||||
|
```
|
||||||
|
|
||||||
|
This configuration enables routing to specific OpenAI, Anthropic, and Gemini models with explicit names.
|
||||||
|
|
||||||
|
## 4. Configure Codex to Use LiteLLM Proxy
|
||||||
|
|
||||||
|
Set the required environment variables to point Codex to your LiteLLM Proxy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Point to your LiteLLM Proxy server
|
||||||
|
export OPENAI_BASE_URL=http://0.0.0.0:4000
|
||||||
|
|
||||||
|
# Use your LiteLLM API key (if you've set up authentication)
|
||||||
|
export OPENAI_API_KEY="sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 5. Run Codex with Gemini
|
||||||
|
|
||||||
|
With everything configured, you can now run Codex with Gemini:
|
||||||
|
|
||||||
|
```bash showLineNumbers
|
||||||
|
codex --model gemini-2.0-flash --full-auto
|
||||||
|
```
|
||||||
|
|
||||||
|
<Image img={require('../../img/litellm_codex.gif')} />
|
||||||
|
|
||||||
|
The `--full-auto` flag allows Codex to automatically generate code without additional prompting.
|
||||||
|
|
||||||
|
## 6. Advanced Options
|
||||||
|
|
||||||
|
### Using Different Models
|
||||||
|
|
||||||
|
You can use any model configured in your LiteLLM proxy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Use Claude models
|
||||||
|
codex --model claude-3-7-sonnet-latest
|
||||||
|
|
||||||
|
# Use Google AI Studio Gemini models
|
||||||
|
codex --model gemini/gemini-2.0-flash
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
- If you encounter connection issues, ensure your LiteLLM Proxy is running and accessible at the specified URL
|
||||||
|
- Verify your LiteLLM API key is valid if you're using authentication
|
||||||
|
- Check that your model routing configuration is correct
|
||||||
|
- For model-specific errors, ensure the model is properly configured in your LiteLLM setup
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
|
||||||
|
- [LiteLLM Docker Quick Start Guide](../proxy/docker_quick_start.md)
|
||||||
|
- [OpenAI Codex GitHub Repository](https://github.com/openai/codex)
|
||||||
|
- [LiteLLM Virtual Keys and Authentication](../proxy/virtual_keys.md)
|
74
docs/my-website/docs/tutorials/scim_litellm.md
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# SCIM with LiteLLM
|
||||||
|
|
||||||
|
Enables identity providers (Okta, Azure AD, OneLogin, etc.) to automate user and team (group) provisioning, updates, and deprovisioning on LiteLLM.
|
||||||
|
|
||||||
|
|
||||||
|
This tutorial will walk you through the steps to connect your IDP to LiteLLM SCIM Endpoints.
|
||||||
|
|
||||||
|
### Supported SSO Providers for SCIM
|
||||||
|
Below is a list of supported SSO providers for connecting to LiteLLM SCIM Endpoints.
|
||||||
|
- Microsoft Entra ID (Azure AD)
|
||||||
|
- Okta
|
||||||
|
- Google Workspace
|
||||||
|
- OneLogin
|
||||||
|
- Keycloak
|
||||||
|
- Auth0
|
||||||
|
|
||||||
|
|
||||||
|
## 1. Get your SCIM Tenant URL and Bearer Token
|
||||||
|
|
||||||
|
On LiteLLM, navigate to the Settings > Admin Settings > SCIM. On this page you will create a SCIM Token, this allows your IDP to authenticate to litellm `/scim` endpoints.
|
||||||
|
|
||||||
|
<Image img={require('../../img/scim_2.png')} style={{ width: '800px', height: 'auto' }} />
|
||||||
|
|
||||||
|
## 2. Connect your IDP to LiteLLM SCIM Endpoints
|
||||||
|
|
||||||
|
On your IDP provider, navigate to your SSO application and select `Provisioning` > `New provisioning configuration`.
|
||||||
|
|
||||||
|
On this page, paste in your litellm scim tenant url and bearer token.
|
||||||
|
|
||||||
|
Once this is pasted in, click on `Test Connection` to ensure your IDP can authenticate to the LiteLLM SCIM endpoints.
|
||||||
|
|
||||||
|
<Image img={require('../../img/scim_4.png')} style={{ width: '800px', height: 'auto' }} />
|
||||||
|
|
||||||
|
|
||||||
|
## 3. Test SCIM Connection
|
||||||
|
|
||||||
|
### 3.1 Assign the group to your LiteLLM Enterprise App
|
||||||
|
|
||||||
|
On your IDP Portal, navigate to `Enterprise Applications` > Select your litellm app
|
||||||
|
|
||||||
|
<Image img={require('../../img/msft_enterprise_app.png')} style={{ width: '800px', height: 'auto' }} />
|
||||||
|
|
||||||
|
<br />
|
||||||
|
<br />
|
||||||
|
|
||||||
|
Once you've selected your litellm app, click on `Users and Groups` > `Add user/group`
|
||||||
|
|
||||||
|
<Image img={require('../../img/msft_enterprise_assign_group.png')} style={{ width: '800px', height: 'auto' }} />
|
||||||
|
|
||||||
|
<br />
|
||||||
|
|
||||||
|
Now select the group you created in step 1.1. And add it to the LiteLLM Enterprise App. At this point we have added `Production LLM Evals Group` to the LiteLLM Enterprise App. The next step is having LiteLLM automatically create the `Production LLM Evals Group` on the LiteLLM DB when a new user signs in.
|
||||||
|
|
||||||
|
<Image img={require('../../img/msft_enterprise_select_group.png')} style={{ width: '800px', height: 'auto' }} />
|
||||||
|
|
||||||
|
|
||||||
|
### 3.2 Sign in to LiteLLM UI via SSO
|
||||||
|
|
||||||
|
Sign into the LiteLLM UI via SSO. You should be redirected to the Entra ID SSO page. This SSO sign in flow will trigger LiteLLM to fetch the latest Groups and Members from Azure Entra ID.
|
||||||
|
|
||||||
|
<Image img={require('../../img/msft_sso_sign_in.png')} style={{ width: '800px', height: 'auto' }} />
|
||||||
|
|
||||||
|
### 3.3 Check the new team on LiteLLM UI
|
||||||
|
|
||||||
|
On the LiteLLM UI, Navigate to `Teams`, You should see the new team `Production LLM Evals Group` auto-created on LiteLLM.
|
||||||
|
|
||||||
|
<Image img={require('../../img/msft_auto_team.png')} style={{ width: '900px', height: 'auto' }} />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
BIN
docs/my-website/img/litellm_codex.gif
Normal file
After Width: | Height: | Size: 12 MiB |
BIN
docs/my-website/img/release_notes/new_tag_usage.png
Normal file
After Width: | Height: | Size: 207 KiB |
BIN
docs/my-website/img/release_notes/new_team_usage.png
Normal file
After Width: | Height: | Size: 268 KiB |
BIN
docs/my-website/img/release_notes/new_team_usage_highlight.jpg
Normal file
After Width: | Height: | Size: 999 KiB |
BIN
docs/my-website/img/release_notes/unified_responses_api_rn.png
Normal file
After Width: | Height: | Size: 244 KiB |
BIN
docs/my-website/img/scim_0.png
Normal file
After Width: | Height: | Size: 380 KiB |
BIN
docs/my-website/img/scim_1.png
Normal file
After Width: | Height: | Size: 231 KiB |
BIN
docs/my-website/img/scim_2.png
Normal file
After Width: | Height: | Size: 261 KiB |
BIN
docs/my-website/img/scim_3.png
Normal file
After Width: | Height: | Size: 413 KiB |
BIN
docs/my-website/img/scim_4.png
Normal file
After Width: | Height: | Size: 274 KiB |
BIN
docs/my-website/img/scim_integration.png
Normal file
After Width: | Height: | Size: 31 KiB |
153
docs/my-website/release_notes/v1.67.0-stable/index.md
Normal file
|
@ -0,0 +1,153 @@
|
||||||
|
---
|
||||||
|
title: v1.67.0-stable - SCIM Integration
|
||||||
|
slug: v1.67.0-stable
|
||||||
|
date: 2025-04-19T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
|
||||||
|
tags: ["sso", "unified_file_id", "cost_tracking", "security"]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
## Key Highlights
|
||||||
|
|
||||||
|
- **SCIM Integration**: Enables identity providers (Okta, Azure AD, OneLogin, etc.) to automate user and team (group) provisioning, updates, and deprovisioning
|
||||||
|
- **Team and Tag based usage tracking**: You can now see usage and spend by team and tag at 1M+ spend logs.
|
||||||
|
- **Unified Responses API**: Support for calling Anthropic, Gemini, Groq, etc. via OpenAI's new Responses API.
|
||||||
|
|
||||||
|
Let's dive in.
|
||||||
|
|
||||||
|
## SCIM Integration
|
||||||
|
|
||||||
|
<Image img={require('../../img/scim_integration.png')}/>
|
||||||
|
|
||||||
|
This release adds SCIM support to LiteLLM. This allows your SSO provider (Okta, Azure AD, etc) to automatically create/delete users, teams, and memberships on LiteLLM. This means that when you remove a team on your SSO provider, your SSO provider will automatically delete the corresponding team on LiteLLM.
|
||||||
|
|
||||||
|
[Read more](../../docs/tutorials/scim_litellm)
|
||||||
|
## Team and Tag based usage tracking
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/new_team_usage_highlight.jpg')}/>
|
||||||
|
|
||||||
|
|
||||||
|
This release improves team and tag based usage tracking at 1m+ spend logs, making it easy to monitor your LLM API Spend in production. This covers:
|
||||||
|
|
||||||
|
- View **daily spend** by teams + tags
|
||||||
|
- View **usage / spend by key**, within teams
|
||||||
|
- View **spend by multiple tags**
|
||||||
|
- Allow **internal users** to view spend of teams they're a member of
|
||||||
|
|
||||||
|
[Read more](#management-endpoints--ui)
|
||||||
|
|
||||||
|
## Unified Responses API
|
||||||
|
|
||||||
|
This release allows you to call Azure OpenAI, Anthropic, AWS Bedrock, and Google Vertex AI models via the POST /v1/responses endpoint on LiteLLM. This means you can now use popular tools like [OpenAI Codex](https://docs.litellm.ai/docs/tutorials/openai_codex) with your own models.
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/unified_responses_api_rn.png')}/>
|
||||||
|
|
||||||
|
|
||||||
|
[Read more](https://docs.litellm.ai/docs/response_api)
|
||||||
|
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
|
||||||
|
- **OpenAI**
|
||||||
|
1. gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3, o3-mini, o4-mini pricing - [Get Started](../../docs/providers/openai#usage), [PR](https://github.com/BerriAI/litellm/pull/9990)
|
||||||
|
2. o4 - correctly map o4 to openai o_series model
|
||||||
|
- **Azure AI**
|
||||||
|
1. Phi-4 output cost per token fix - [PR](https://github.com/BerriAI/litellm/pull/9880)
|
||||||
|
2. Responses API support [Get Started](../../docs/providers/azure#azure-responses-api),[PR](https://github.com/BerriAI/litellm/pull/10116)
|
||||||
|
- **Anthropic**
|
||||||
|
1. redacted message thinking support - [Get Started](../../docs/providers/anthropic#usage---thinking--reasoning_content),[PR](https://github.com/BerriAI/litellm/pull/10129)
|
||||||
|
- **Cohere**
|
||||||
|
1. `/v2/chat` Passthrough endpoint support w/ cost tracking - [Get Started](../../docs/pass_through/cohere), [PR](https://github.com/BerriAI/litellm/pull/9997)
|
||||||
|
- **Azure**
|
||||||
|
1. Support azure tenant_id/client_id env vars - [Get Started](../../docs/providers/azure#entra-id---use-tenant_id-client_id-client_secret), [PR](https://github.com/BerriAI/litellm/pull/9993)
|
||||||
|
2. Fix response_format check for 2025+ api versions - [PR](https://github.com/BerriAI/litellm/pull/9993)
|
||||||
|
3. Add gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3, o3-mini, o4-mini pricing
|
||||||
|
- **VLLM**
|
||||||
|
1. Files - Support 'file' message type for VLLM video url's - [Get Started](../../docs/providers/vllm#send-video-url-to-vllm), [PR](https://github.com/BerriAI/litellm/pull/10129)
|
||||||
|
2. Passthrough - new `/vllm/` passthrough endpoint support [Get Started](../../docs/pass_through/vllm), [PR](https://github.com/BerriAI/litellm/pull/10002)
|
||||||
|
- **Mistral**
|
||||||
|
1. new `/mistral` passthrough endpoint support [Get Started](../../docs/pass_through/mistral), [PR](https://github.com/BerriAI/litellm/pull/10002)
|
||||||
|
- **AWS**
|
||||||
|
1. New mapped bedrock regions - [PR](https://github.com/BerriAI/litellm/pull/9430)
|
||||||
|
- **VertexAI / Google AI Studio**
|
||||||
|
1. Gemini - Response format - Retain schema field ordering for google gemini and vertex by specifying propertyOrdering - [Get Started](../../docs/providers/vertex#json-schema), [PR](https://github.com/BerriAI/litellm/pull/9828)
|
||||||
|
2. Gemini-2.5-flash - return reasoning content [Google AI Studio](../../docs/providers/gemini#usage---thinking--reasoning_content), [Vertex AI](../../docs/providers/vertex#thinking--reasoning_content)
|
||||||
|
3. Gemini-2.5-flash - pricing + model information [PR](https://github.com/BerriAI/litellm/pull/10125)
|
||||||
|
4. Passthrough - new `/vertex_ai/discovery` route - enables calling AgentBuilder API routes [Get Started](../../docs/pass_through/vertex_ai#supported-api-endpoints), [PR](https://github.com/BerriAI/litellm/pull/10084)
|
||||||
|
- **Fireworks AI**
|
||||||
|
1. return tool calling responses in `tool_calls` field (fireworks incorrectly returns this as a json str in content) [PR](https://github.com/BerriAI/litellm/pull/10130)
|
||||||
|
- **Triton**
|
||||||
|
1. Remove fixed remove bad_words / stop words from `/generate` call - [Get Started](../../docs/providers/triton-inference-server#triton-generate---chat-completion), [PR](https://github.com/BerriAI/litellm/pull/10163)
|
||||||
|
- **Other**
|
||||||
|
1. Support for all litellm providers on Responses API (works with Codex) - [Get Started](../../docs/tutorials/openai_codex), [PR](https://github.com/BerriAI/litellm/pull/10132)
|
||||||
|
2. Fix combining multiple tool calls in streaming response - [Get Started](../../docs/completion/stream#helper-function), [PR](https://github.com/BerriAI/litellm/pull/10040)
|
||||||
|
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
|
||||||
|
- **Cost Control** - inject cache control points in prompt for cost reduction [Get Started](../../docs/tutorials/prompt_caching), [PR](https://github.com/BerriAI/litellm/pull/10000)
|
||||||
|
- **Spend Tags** - spend tags in headers - support x-litellm-tags even if tag based routing not enabled [Get Started](../../docs/proxy/request_headers#litellm-headers), [PR](https://github.com/BerriAI/litellm/pull/10000)
|
||||||
|
- **Gemini-2.5-flash** - support cost calculation for reasoning tokens [PR](https://github.com/BerriAI/litellm/pull/10141)
|
||||||
|
|
||||||
|
## Management Endpoints / UI
|
||||||
|
- **Users**
|
||||||
|
1. Show created_at and updated_at on users page - [PR](https://github.com/BerriAI/litellm/pull/10033)
|
||||||
|
- **Virtual Keys**
|
||||||
|
1. Filter by key alias - https://github.com/BerriAI/litellm/pull/10085
|
||||||
|
- **Usage Tab**
|
||||||
|
|
||||||
|
1. Team based usage
|
||||||
|
|
||||||
|
- New `LiteLLM_DailyTeamSpend` Table for aggregate team based usage logging - [PR](https://github.com/BerriAI/litellm/pull/10039)
|
||||||
|
|
||||||
|
- New Team based usage dashboard + new `/team/daily/activity` API - [PR](https://github.com/BerriAI/litellm/pull/10081)
|
||||||
|
- Return team alias on /team/daily/activity API - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
- allow internal user view spend for teams they belong to - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
- allow viewing top keys by team - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/new_team_usage.png')}/>
|
||||||
|
|
||||||
|
2. Tag Based Usage
|
||||||
|
- New `LiteLLM_DailyTagSpend` Table for aggregate tag based usage logging - [PR](https://github.com/BerriAI/litellm/pull/10071)
|
||||||
|
- Restrict to only Proxy Admins - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
- allow viewing top keys by tag
|
||||||
|
- Return tags passed in request (i.e. dynamic tags) on `/tag/list` API - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
<Image img={require('../../img/release_notes/new_tag_usage.png')}/>
|
||||||
|
3. Track prompt caching metrics in daily user, team, tag tables - [PR](https://github.com/BerriAI/litellm/pull/10029)
|
||||||
|
4. Show usage by key (on all up, team, and tag usage dashboards) - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
5. swap old usage with new usage tab
|
||||||
|
- **Models**
|
||||||
|
1. Make columns resizable/hideable - [PR](https://github.com/BerriAI/litellm/pull/10119)
|
||||||
|
- **API Playground**
|
||||||
|
1. Allow internal user to call api playground - [PR](https://github.com/BerriAI/litellm/pull/10157)
|
||||||
|
- **SCIM**
|
||||||
|
1. Add LiteLLM SCIM Integration for Team and User management - [Get Started](../../docs/tutorials/scim_litellm), [PR](https://github.com/BerriAI/litellm/pull/10072)
|
||||||
|
|
||||||
|
|
||||||
|
## Logging / Guardrail Integrations
|
||||||
|
- **GCS**
|
||||||
|
1. Fix gcs pub sub logging with env var GCS_PROJECT_ID - [Get Started](../../docs/observability/gcs_bucket_integration#usage), [PR](https://github.com/BerriAI/litellm/pull/10042)
|
||||||
|
- **AIM**
|
||||||
|
1. Add litellm call id passing to Aim guardrails on pre and post-hooks calls - [Get Started](../../docs/proxy/guardrails/aim_security), [PR](https://github.com/BerriAI/litellm/pull/10021)
|
||||||
|
- **Azure blob storage**
|
||||||
|
1. Ensure logging works in high throughput scenarios - [Get Started](../../docs/proxy/logging#azure-blob-storage), [PR](https://github.com/BerriAI/litellm/pull/9962)
|
||||||
|
|
||||||
|
## General Proxy Improvements
|
||||||
|
|
||||||
|
- **Support setting `litellm.modify_params` via env var** [PR](https://github.com/BerriAI/litellm/pull/9964)
|
||||||
|
- **Model Discovery** - Check provider’s `/models` endpoints when calling proxy’s `/v1/models` endpoint - [Get Started](../../docs/proxy/model_discovery), [PR](https://github.com/BerriAI/litellm/pull/9958)
|
||||||
|
- **`/utils/token_counter`** - fix retrieving custom tokenizer for db models - [Get Started](../../docs/proxy/configs#set-custom-tokenizer), [PR](https://github.com/BerriAI/litellm/pull/10047)
|
||||||
|
- **Prisma migrate** - handle existing columns in db table - [PR](https://github.com/BerriAI/litellm/pull/10138)
|
||||||
|
|
|
@ -69,6 +69,7 @@ const sidebars = {
|
||||||
"proxy/clientside_auth",
|
"proxy/clientside_auth",
|
||||||
"proxy/request_headers",
|
"proxy/request_headers",
|
||||||
"proxy/response_headers",
|
"proxy/response_headers",
|
||||||
|
"proxy/model_discovery",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -101,6 +102,7 @@ const sidebars = {
|
||||||
"proxy/admin_ui_sso",
|
"proxy/admin_ui_sso",
|
||||||
"proxy/self_serve",
|
"proxy/self_serve",
|
||||||
"proxy/public_teams",
|
"proxy/public_teams",
|
||||||
|
"tutorials/scim_litellm",
|
||||||
"proxy/custom_sso",
|
"proxy/custom_sso",
|
||||||
"proxy/ui_credentials",
|
"proxy/ui_credentials",
|
||||||
"proxy/ui_logs"
|
"proxy/ui_logs"
|
||||||
|
@ -330,6 +332,8 @@ const sidebars = {
|
||||||
"pass_through/vertex_ai",
|
"pass_through/vertex_ai",
|
||||||
"pass_through/google_ai_studio",
|
"pass_through/google_ai_studio",
|
||||||
"pass_through/cohere",
|
"pass_through/cohere",
|
||||||
|
"pass_through/vllm",
|
||||||
|
"pass_through/mistral",
|
||||||
"pass_through/openai_passthrough",
|
"pass_through/openai_passthrough",
|
||||||
"pass_through/anthropic_completion",
|
"pass_through/anthropic_completion",
|
||||||
"pass_through/bedrock",
|
"pass_through/bedrock",
|
||||||
|
@ -443,6 +447,7 @@ const sidebars = {
|
||||||
label: "Tutorials",
|
label: "Tutorials",
|
||||||
items: [
|
items: [
|
||||||
"tutorials/openweb_ui",
|
"tutorials/openweb_ui",
|
||||||
|
"tutorials/openai_codex",
|
||||||
"tutorials/msft_sso",
|
"tutorials/msft_sso",
|
||||||
"tutorials/prompt_caching",
|
"tutorials/prompt_caching",
|
||||||
"tutorials/tag_management",
|
"tutorials/tag_management",
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -82,6 +83,26 @@ class ProxyExtrasDBManager:
|
||||||
logger.info(f"Found {len(migration_paths)} migrations at {migrations_dir}")
|
logger.info(f"Found {len(migration_paths)} migrations at {migrations_dir}")
|
||||||
return [Path(p).parent.name for p in migration_paths]
|
return [Path(p).parent.name for p in migration_paths]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _roll_back_migration(migration_name: str):
|
||||||
|
"""Mark a specific migration as rolled back"""
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "migrate", "resolve", "--rolled-back", migration_name],
|
||||||
|
timeout=60,
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _resolve_specific_migration(migration_name: str):
|
||||||
|
"""Mark a specific migration as applied"""
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "migrate", "resolve", "--applied", migration_name],
|
||||||
|
timeout=60,
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_all_migrations(migrations_dir: str):
|
def _resolve_all_migrations(migrations_dir: str):
|
||||||
"""Mark all existing migrations as applied"""
|
"""Mark all existing migrations as applied"""
|
||||||
|
@ -141,7 +162,34 @@ class ProxyExtrasDBManager:
|
||||||
return True
|
return True
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
|
logger.info(f"prisma db error: {e.stderr}, e: {e.stdout}")
|
||||||
if (
|
if "P3009" in e.stderr:
|
||||||
|
# Extract the failed migration name from the error message
|
||||||
|
migration_match = re.search(
|
||||||
|
r"`(\d+_.*)` migration", e.stderr
|
||||||
|
)
|
||||||
|
if migration_match:
|
||||||
|
failed_migration = migration_match.group(1)
|
||||||
|
logger.info(
|
||||||
|
f"Found failed migration: {failed_migration}, marking as rolled back"
|
||||||
|
)
|
||||||
|
# Mark the failed migration as rolled back
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"resolve",
|
||||||
|
"--rolled-back",
|
||||||
|
failed_migration,
|
||||||
|
],
|
||||||
|
timeout=60,
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"✅ Migration {failed_migration} marked as rolled back... retrying"
|
||||||
|
)
|
||||||
|
elif (
|
||||||
"P3005" in e.stderr
|
"P3005" in e.stderr
|
||||||
and "database schema is not empty" in e.stderr
|
and "database schema is not empty" in e.stderr
|
||||||
):
|
):
|
||||||
|
@ -155,6 +203,29 @@ class ProxyExtrasDBManager:
|
||||||
ProxyExtrasDBManager._resolve_all_migrations(migrations_dir)
|
ProxyExtrasDBManager._resolve_all_migrations(migrations_dir)
|
||||||
logger.info("✅ All migrations resolved.")
|
logger.info("✅ All migrations resolved.")
|
||||||
return True
|
return True
|
||||||
|
elif (
|
||||||
|
"P3018" in e.stderr
|
||||||
|
): # PostgreSQL error code for duplicate column
|
||||||
|
logger.info(
|
||||||
|
"Migration already exists, resolving specific migration"
|
||||||
|
)
|
||||||
|
# Extract the migration name from the error message
|
||||||
|
migration_match = re.search(
|
||||||
|
r"Migration name: (\d+_.*)", e.stderr
|
||||||
|
)
|
||||||
|
if migration_match:
|
||||||
|
migration_name = migration_match.group(1)
|
||||||
|
logger.info(f"Rolling back migration {migration_name}")
|
||||||
|
ProxyExtrasDBManager._roll_back_migration(
|
||||||
|
migration_name
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Resolving migration {migration_name} that failed due to existing columns"
|
||||||
|
)
|
||||||
|
ProxyExtrasDBManager._resolve_specific_migration(
|
||||||
|
migration_name
|
||||||
|
)
|
||||||
|
logger.info("✅ Migration resolved.")
|
||||||
else:
|
else:
|
||||||
# Use prisma db push with increased timeout
|
# Use prisma db push with increased timeout
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm-proxy-extras"
|
name = "litellm-proxy-extras"
|
||||||
version = "0.1.10"
|
version = "0.1.11"
|
||||||
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
description = "Additional files for the LiteLLM Proxy. Reduces the size of the main litellm package."
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -22,7 +22,7 @@ requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "0.1.10"
|
version = "0.1.11"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:version",
|
"pyproject.toml:version",
|
||||||
"../requirements.txt:litellm-proxy-extras==",
|
"../requirements.txt:litellm-proxy-extras==",
|
||||||
|
|
|
@ -304,6 +304,11 @@ def create_assistants(
|
||||||
"response_format": response_format,
|
"response_format": response_format,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# only send params that are not None
|
||||||
|
create_assistant_data = {
|
||||||
|
k: v for k, v in create_assistant_data.items() if v is not None
|
||||||
|
}
|
||||||
|
|
||||||
response: Optional[Union[Coroutine[Any, Any, Assistant], Assistant]] = None
|
response: Optional[Union[Coroutine[Any, Any, Assistant], Assistant]] = None
|
||||||
if custom_llm_provider == "openai":
|
if custom_llm_provider == "openai":
|
||||||
api_base = (
|
api_base = (
|
||||||
|
|
|
@ -21,6 +21,10 @@ DEFAULT_MAX_TOKENS = 256 # used when providers need a default
|
||||||
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||||
|
|
||||||
|
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET = 1024
|
||||||
|
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET = 2048
|
||||||
|
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET = 4096
|
||||||
|
|
||||||
########## Networking constants ##############################################################
|
########## Networking constants ##############################################################
|
||||||
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
|
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
|
||||||
|
|
||||||
|
|
|
@ -265,8 +265,10 @@ def generic_cost_per_token(
|
||||||
)
|
)
|
||||||
|
|
||||||
## CALCULATE OUTPUT COST
|
## CALCULATE OUTPUT COST
|
||||||
text_tokens = usage.completion_tokens
|
text_tokens = 0
|
||||||
audio_tokens = 0
|
audio_tokens = 0
|
||||||
|
reasoning_tokens = 0
|
||||||
|
is_text_tokens_total = False
|
||||||
if usage.completion_tokens_details is not None:
|
if usage.completion_tokens_details is not None:
|
||||||
audio_tokens = (
|
audio_tokens = (
|
||||||
cast(
|
cast(
|
||||||
|
@ -280,9 +282,20 @@ def generic_cost_per_token(
|
||||||
Optional[int],
|
Optional[int],
|
||||||
getattr(usage.completion_tokens_details, "text_tokens", None),
|
getattr(usage.completion_tokens_details, "text_tokens", None),
|
||||||
)
|
)
|
||||||
or usage.completion_tokens # default to completion tokens, if this field is not set
|
or 0 # default to completion tokens, if this field is not set
|
||||||
|
)
|
||||||
|
reasoning_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.completion_tokens_details, "reasoning_tokens", 0),
|
||||||
|
)
|
||||||
|
or 0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if text_tokens == 0:
|
||||||
|
text_tokens = usage.completion_tokens
|
||||||
|
if text_tokens == usage.completion_tokens:
|
||||||
|
is_text_tokens_total = True
|
||||||
## TEXT COST
|
## TEXT COST
|
||||||
completion_cost = float(text_tokens) * completion_base_cost
|
completion_cost = float(text_tokens) * completion_base_cost
|
||||||
|
|
||||||
|
@ -290,12 +303,26 @@ def generic_cost_per_token(
|
||||||
"output_cost_per_audio_token"
|
"output_cost_per_audio_token"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_output_cost_per_reasoning_token: Optional[float] = model_info.get(
|
||||||
|
"output_cost_per_reasoning_token"
|
||||||
|
)
|
||||||
|
|
||||||
## AUDIO COST
|
## AUDIO COST
|
||||||
if (
|
if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0:
|
||||||
_output_cost_per_audio_token is not None
|
_output_cost_per_audio_token = (
|
||||||
and audio_tokens is not None
|
_output_cost_per_audio_token
|
||||||
and audio_tokens > 0
|
if _output_cost_per_audio_token is not None
|
||||||
):
|
else completion_base_cost
|
||||||
|
)
|
||||||
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
|
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
|
||||||
|
|
||||||
|
## REASONING COST
|
||||||
|
if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0:
|
||||||
|
_output_cost_per_reasoning_token = (
|
||||||
|
_output_cost_per_reasoning_token
|
||||||
|
if _output_cost_per_reasoning_token is not None
|
||||||
|
else completion_base_cost
|
||||||
|
)
|
||||||
|
completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token
|
||||||
|
|
||||||
return prompt_cost, completion_cost
|
return prompt_cost, completion_cost
|
||||||
|
|
|
@ -14,6 +14,7 @@ from litellm.types.llms.openai import ChatCompletionThinkingBlock
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
ChatCompletionDeltaToolCall,
|
ChatCompletionDeltaToolCall,
|
||||||
ChatCompletionMessageToolCall,
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
Choices,
|
Choices,
|
||||||
Delta,
|
Delta,
|
||||||
EmbeddingResponse,
|
EmbeddingResponse,
|
||||||
|
@ -486,7 +487,14 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle thinking models that display `thinking_blocks` within `content`
|
# Handle thinking models that display `thinking_blocks` within `content`
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
if "thinking_blocks" in choice["message"]:
|
if "thinking_blocks" in choice["message"]:
|
||||||
thinking_blocks = choice["message"]["thinking_blocks"]
|
thinking_blocks = choice["message"]["thinking_blocks"]
|
||||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||||
|
|
|
@ -471,3 +471,59 @@ def unpack_defs(schema, defs):
|
||||||
unpack_defs(ref, defs)
|
unpack_defs(ref, defs)
|
||||||
value["items"] = ref
|
value["items"] = ref
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Get mime type for common image URLs
|
||||||
|
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
|
||||||
|
|
||||||
|
Supported by Gemini:
|
||||||
|
application/pdf
|
||||||
|
audio/mpeg
|
||||||
|
audio/mp3
|
||||||
|
audio/wav
|
||||||
|
image/png
|
||||||
|
image/jpeg
|
||||||
|
image/webp
|
||||||
|
text/plain
|
||||||
|
video/mov
|
||||||
|
video/mpeg
|
||||||
|
video/mp4
|
||||||
|
video/mpg
|
||||||
|
video/avi
|
||||||
|
video/wmv
|
||||||
|
video/mpegps
|
||||||
|
video/flv
|
||||||
|
"""
|
||||||
|
url = url.lower()
|
||||||
|
|
||||||
|
# Map file extensions to mime types
|
||||||
|
mime_types = {
|
||||||
|
# Images
|
||||||
|
(".jpg", ".jpeg"): "image/jpeg",
|
||||||
|
(".png",): "image/png",
|
||||||
|
(".webp",): "image/webp",
|
||||||
|
# Videos
|
||||||
|
(".mp4",): "video/mp4",
|
||||||
|
(".mov",): "video/mov",
|
||||||
|
(".mpeg", ".mpg"): "video/mpeg",
|
||||||
|
(".avi",): "video/avi",
|
||||||
|
(".wmv",): "video/wmv",
|
||||||
|
(".mpegps",): "video/mpegps",
|
||||||
|
(".flv",): "video/flv",
|
||||||
|
# Audio
|
||||||
|
(".mp3",): "audio/mp3",
|
||||||
|
(".wav",): "audio/wav",
|
||||||
|
(".mpeg",): "audio/mpeg",
|
||||||
|
# Documents
|
||||||
|
(".pdf",): "application/pdf",
|
||||||
|
(".txt",): "text/plain",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check each extension group against the URL
|
||||||
|
for extensions, mime_type in mime_types.items():
|
||||||
|
if any(url.endswith(ext) for ext in extensions):
|
||||||
|
return mime_type
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
|
@ -2258,6 +2258,14 @@ def _parse_content_type(content_type: str) -> str:
|
||||||
return m.get_content_type()
|
return m.get_content_type()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_mime_type(base64_data: str) -> Optional[str]:
|
||||||
|
mime_type_match = re.match(r"data:(.*?);base64", base64_data)
|
||||||
|
if mime_type_match:
|
||||||
|
return mime_type_match.group(1)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class BedrockImageProcessor:
|
class BedrockImageProcessor:
|
||||||
"""Handles both sync and async image processing for Bedrock conversations."""
|
"""Handles both sync and async image processing for Bedrock conversations."""
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ from litellm.types.llms.anthropic import (
|
||||||
UsageDelta,
|
UsageDelta,
|
||||||
)
|
)
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
)
|
)
|
||||||
|
@ -501,18 +502,19 @@ class ModelResponseIterator:
|
||||||
) -> Tuple[
|
) -> Tuple[
|
||||||
str,
|
str,
|
||||||
Optional[ChatCompletionToolCallChunk],
|
Optional[ChatCompletionToolCallChunk],
|
||||||
List[ChatCompletionThinkingBlock],
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
|
||||||
Dict[str, Any],
|
Dict[str, Any],
|
||||||
]:
|
]:
|
||||||
"""
|
"""
|
||||||
Helper function to handle the content block delta
|
Helper function to handle the content block delta
|
||||||
"""
|
"""
|
||||||
|
|
||||||
text = ""
|
text = ""
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
provider_specific_fields = {}
|
provider_specific_fields = {}
|
||||||
content_block = ContentBlockDelta(**chunk) # type: ignore
|
content_block = ContentBlockDelta(**chunk) # type: ignore
|
||||||
thinking_blocks: List[ChatCompletionThinkingBlock] = []
|
thinking_blocks: List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = []
|
||||||
|
|
||||||
self.content_blocks.append(content_block)
|
self.content_blocks.append(content_block)
|
||||||
if "text" in content_block["delta"]:
|
if "text" in content_block["delta"]:
|
||||||
|
@ -541,20 +543,25 @@ class ModelResponseIterator:
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||||
|
|
||||||
return text, tool_use, thinking_blocks, provider_specific_fields
|
return text, tool_use, thinking_blocks, provider_specific_fields
|
||||||
|
|
||||||
def _handle_reasoning_content(
|
def _handle_reasoning_content(
|
||||||
self, thinking_blocks: List[ChatCompletionThinkingBlock]
|
self,
|
||||||
|
thinking_blocks: List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
],
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Handle the reasoning content
|
Handle the reasoning content
|
||||||
"""
|
"""
|
||||||
reasoning_content = None
|
reasoning_content = None
|
||||||
for block in thinking_blocks:
|
for block in thinking_blocks:
|
||||||
|
thinking_content = cast(Optional[str], block.get("thinking"))
|
||||||
if reasoning_content is None:
|
if reasoning_content is None:
|
||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
if "thinking" in block:
|
if thinking_content is not None:
|
||||||
reasoning_content += block["thinking"]
|
reasoning_content += thinking_content
|
||||||
return reasoning_content
|
return reasoning_content
|
||||||
|
|
||||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||||
|
@ -567,7 +574,13 @@ class ModelResponseIterator:
|
||||||
usage: Optional[Usage] = None
|
usage: Optional[Usage] = None
|
||||||
provider_specific_fields: Dict[str, Any] = {}
|
provider_specific_fields: Dict[str, Any] = {}
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
|
|
||||||
index = int(chunk.get("index", 0))
|
index = int(chunk.get("index", 0))
|
||||||
if type_chunk == "content_block_delta":
|
if type_chunk == "content_block_delta":
|
||||||
|
@ -605,6 +618,15 @@ class ModelResponseIterator:
|
||||||
},
|
},
|
||||||
"index": self.tool_index,
|
"index": self.tool_index,
|
||||||
}
|
}
|
||||||
|
elif (
|
||||||
|
content_block_start["content_block"]["type"] == "redacted_thinking"
|
||||||
|
):
|
||||||
|
thinking_blocks = [
|
||||||
|
ChatCompletionRedactedThinkingBlock(
|
||||||
|
type="redacted_thinking",
|
||||||
|
data=content_block_start["content_block"]["data"],
|
||||||
|
)
|
||||||
|
]
|
||||||
elif type_chunk == "content_block_stop":
|
elif type_chunk == "content_block_stop":
|
||||||
ContentBlockStop(**chunk) # type: ignore
|
ContentBlockStop(**chunk) # type: ignore
|
||||||
# check if tool call content block
|
# check if tool call content block
|
||||||
|
|
|
@ -7,6 +7,9 @@ import httpx
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.constants import (
|
from litellm.constants import (
|
||||||
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,
|
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,
|
||||||
|
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||||
|
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||||
|
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||||
RESPONSE_FORMAT_TOOL_NAME,
|
RESPONSE_FORMAT_TOOL_NAME,
|
||||||
)
|
)
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
|
@ -27,6 +30,7 @@ from litellm.types.llms.openai import (
|
||||||
REASONING_EFFORT,
|
REASONING_EFFORT,
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
ChatCompletionCachedContent,
|
ChatCompletionCachedContent,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionSystemMessage,
|
ChatCompletionSystemMessage,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
|
@ -276,11 +280,20 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
if reasoning_effort is None:
|
if reasoning_effort is None:
|
||||||
return None
|
return None
|
||||||
elif reasoning_effort == "low":
|
elif reasoning_effort == "low":
|
||||||
return AnthropicThinkingParam(type="enabled", budget_tokens=1024)
|
return AnthropicThinkingParam(
|
||||||
|
type="enabled",
|
||||||
|
budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||||
|
)
|
||||||
elif reasoning_effort == "medium":
|
elif reasoning_effort == "medium":
|
||||||
return AnthropicThinkingParam(type="enabled", budget_tokens=2048)
|
return AnthropicThinkingParam(
|
||||||
|
type="enabled",
|
||||||
|
budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||||
|
)
|
||||||
elif reasoning_effort == "high":
|
elif reasoning_effort == "high":
|
||||||
return AnthropicThinkingParam(type="enabled", budget_tokens=4096)
|
return AnthropicThinkingParam(
|
||||||
|
type="enabled",
|
||||||
|
budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
|
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
|
||||||
|
|
||||||
|
@ -563,13 +576,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
) -> Tuple[
|
) -> Tuple[
|
||||||
str,
|
str,
|
||||||
Optional[List[Any]],
|
Optional[List[Any]],
|
||||||
Optional[List[ChatCompletionThinkingBlock]],
|
Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
],
|
||||||
Optional[str],
|
Optional[str],
|
||||||
List[ChatCompletionToolCallChunk],
|
List[ChatCompletionToolCallChunk],
|
||||||
]:
|
]:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
citations: Optional[List[Any]] = None
|
citations: Optional[List[Any]] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||||
for idx, content in enumerate(completion_response["content"]):
|
for idx, content in enumerate(completion_response["content"]):
|
||||||
|
@ -588,20 +609,30 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
index=idx,
|
index=idx,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
## CITATIONS
|
|
||||||
if content.get("citations", None) is not None:
|
elif content.get("thinking", None) is not None:
|
||||||
if citations is None:
|
|
||||||
citations = []
|
|
||||||
citations.append(content["citations"])
|
|
||||||
if content.get("thinking", None) is not None:
|
|
||||||
if thinking_blocks is None:
|
if thinking_blocks is None:
|
||||||
thinking_blocks = []
|
thinking_blocks = []
|
||||||
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
||||||
|
elif content["type"] == "redacted_thinking":
|
||||||
|
if thinking_blocks is None:
|
||||||
|
thinking_blocks = []
|
||||||
|
thinking_blocks.append(
|
||||||
|
cast(ChatCompletionRedactedThinkingBlock, content)
|
||||||
|
)
|
||||||
|
|
||||||
|
## CITATIONS
|
||||||
|
if content.get("citations") is not None:
|
||||||
|
if citations is None:
|
||||||
|
citations = []
|
||||||
|
citations.append(content["citations"])
|
||||||
if thinking_blocks is not None:
|
if thinking_blocks is not None:
|
||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
for block in thinking_blocks:
|
for block in thinking_blocks:
|
||||||
if "thinking" in block:
|
thinking_content = cast(Optional[str], block.get("thinking"))
|
||||||
reasoning_content += block["thinking"]
|
if thinking_content is not None:
|
||||||
|
reasoning_content += thinking_content
|
||||||
|
|
||||||
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
||||||
|
|
||||||
def calculate_usage(
|
def calculate_usage(
|
||||||
|
@ -691,7 +722,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
else:
|
else:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
citations: Optional[List[Any]] = None
|
citations: Optional[List[Any]] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||||
|
|
||||||
|
|
|
@ -288,6 +288,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
client=client,
|
client=client,
|
||||||
|
litellm_params=litellm_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
thread_message: OpenAIMessage = openai_client.beta.threads.messages.create( # type: ignore
|
thread_message: OpenAIMessage = openai_client.beta.threads.messages.create( # type: ignore
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import enum
|
||||||
from typing import Any, List, Optional, Tuple, cast
|
from typing import Any, List, Optional, Tuple, cast
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
@ -19,6 +20,10 @@ from litellm.types.utils import ModelResponse, ProviderField
|
||||||
from litellm.utils import _add_path_to_api_base, supports_tool_choice
|
from litellm.utils import _add_path_to_api_base, supports_tool_choice
|
||||||
|
|
||||||
|
|
||||||
|
class AzureFoundryErrorStrings(str, enum.Enum):
|
||||||
|
SET_EXTRA_PARAMETERS_TO_PASS_THROUGH = "Set extra-parameters to 'pass-through'"
|
||||||
|
|
||||||
|
|
||||||
class AzureAIStudioConfig(OpenAIConfig):
|
class AzureAIStudioConfig(OpenAIConfig):
|
||||||
def get_supported_openai_params(self, model: str) -> List:
|
def get_supported_openai_params(self, model: str) -> List:
|
||||||
model_supports_tool_choice = True # azure ai supports this by default
|
model_supports_tool_choice = True # azure ai supports this by default
|
||||||
|
@ -240,12 +245,18 @@ class AzureAIStudioConfig(OpenAIConfig):
|
||||||
) -> bool:
|
) -> bool:
|
||||||
should_drop_params = litellm_params.get("drop_params") or litellm.drop_params
|
should_drop_params = litellm_params.get("drop_params") or litellm.drop_params
|
||||||
error_text = e.response.text
|
error_text = e.response.text
|
||||||
|
|
||||||
if should_drop_params and "Extra inputs are not permitted" in error_text:
|
if should_drop_params and "Extra inputs are not permitted" in error_text:
|
||||||
return True
|
return True
|
||||||
elif (
|
elif (
|
||||||
"unknown field: parameter index is not a valid field" in error_text
|
"unknown field: parameter index is not a valid field" in error_text
|
||||||
): # remove index from tool calls
|
): # remove index from tool calls
|
||||||
return True
|
return True
|
||||||
|
elif (
|
||||||
|
AzureFoundryErrorStrings.SET_EXTRA_PARAMETERS_TO_PASS_THROUGH.value
|
||||||
|
in error_text
|
||||||
|
): # remove extra-parameters from tool calls
|
||||||
|
return True
|
||||||
return super().should_retry_llm_api_inside_llm_translation_on_http_error(
|
return super().should_retry_llm_api_inside_llm_translation_on_http_error(
|
||||||
e=e, litellm_params=litellm_params
|
e=e, litellm_params=litellm_params
|
||||||
)
|
)
|
||||||
|
@ -265,5 +276,46 @@ class AzureAIStudioConfig(OpenAIConfig):
|
||||||
litellm.remove_index_from_tool_calls(
|
litellm.remove_index_from_tool_calls(
|
||||||
messages=_messages,
|
messages=_messages,
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
AzureFoundryErrorStrings.SET_EXTRA_PARAMETERS_TO_PASS_THROUGH.value
|
||||||
|
in e.response.text
|
||||||
|
):
|
||||||
|
request_data = self._drop_extra_params_from_request_data(
|
||||||
|
request_data, e.response.text
|
||||||
|
)
|
||||||
data = drop_params_from_unprocessable_entity_error(e=e, data=request_data)
|
data = drop_params_from_unprocessable_entity_error(e=e, data=request_data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def _drop_extra_params_from_request_data(
|
||||||
|
self, request_data: dict, error_text: str
|
||||||
|
) -> dict:
|
||||||
|
params_to_drop = self._extract_params_to_drop_from_error_text(error_text)
|
||||||
|
if params_to_drop:
|
||||||
|
for param in params_to_drop:
|
||||||
|
if param in request_data:
|
||||||
|
request_data.pop(param, None)
|
||||||
|
return request_data
|
||||||
|
|
||||||
|
def _extract_params_to_drop_from_error_text(
|
||||||
|
self, error_text: str
|
||||||
|
) -> Optional[List[str]]:
|
||||||
|
"""
|
||||||
|
Error text looks like this"
|
||||||
|
"Extra parameters ['stream_options', 'extra-parameters'] are not allowed when extra-parameters is not set or set to be 'error'.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Extract parameters within square brackets
|
||||||
|
match = re.search(r"\[(.*?)\]", error_text)
|
||||||
|
if not match:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Parse the extracted string into a list of parameter names
|
||||||
|
params_str = match.group(1)
|
||||||
|
params = []
|
||||||
|
for param in params_str.split(","):
|
||||||
|
# Clean up the parameter name (remove quotes, spaces)
|
||||||
|
clean_param = param.strip().strip("'").strip('"')
|
||||||
|
if clean_param:
|
||||||
|
params.append(clean_param)
|
||||||
|
return params
|
||||||
|
|
|
@ -22,6 +22,7 @@ from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMExcepti
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionResponseMessage,
|
ChatCompletionResponseMessage,
|
||||||
ChatCompletionSystemMessage,
|
ChatCompletionSystemMessage,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
|
@ -375,25 +376,27 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
system_content_blocks: List[SystemContentBlock] = []
|
system_content_blocks: List[SystemContentBlock] = []
|
||||||
for idx, message in enumerate(messages):
|
for idx, message in enumerate(messages):
|
||||||
if message["role"] == "system":
|
if message["role"] == "system":
|
||||||
_system_content_block: Optional[SystemContentBlock] = None
|
system_prompt_indices.append(idx)
|
||||||
_cache_point_block: Optional[SystemContentBlock] = None
|
if isinstance(message["content"], str) and message["content"]:
|
||||||
if isinstance(message["content"], str) and len(message["content"]) > 0:
|
system_content_blocks.append(
|
||||||
_system_content_block = SystemContentBlock(text=message["content"])
|
SystemContentBlock(text=message["content"])
|
||||||
_cache_point_block = self._get_cache_point_block(
|
)
|
||||||
|
cache_block = self._get_cache_point_block(
|
||||||
message, block_type="system"
|
message, block_type="system"
|
||||||
)
|
)
|
||||||
|
if cache_block:
|
||||||
|
system_content_blocks.append(cache_block)
|
||||||
elif isinstance(message["content"], list):
|
elif isinstance(message["content"], list):
|
||||||
for m in message["content"]:
|
for m in message["content"]:
|
||||||
if m.get("type", "") == "text" and len(m["text"]) > 0:
|
if m.get("type") == "text" and m.get("text"):
|
||||||
_system_content_block = SystemContentBlock(text=m["text"])
|
system_content_blocks.append(
|
||||||
_cache_point_block = self._get_cache_point_block(
|
SystemContentBlock(text=m["text"])
|
||||||
|
)
|
||||||
|
cache_block = self._get_cache_point_block(
|
||||||
m, block_type="system"
|
m, block_type="system"
|
||||||
)
|
)
|
||||||
if _system_content_block is not None:
|
if cache_block:
|
||||||
system_content_blocks.append(_system_content_block)
|
system_content_blocks.append(cache_block)
|
||||||
if _cache_point_block is not None:
|
|
||||||
system_content_blocks.append(_cache_point_block)
|
|
||||||
system_prompt_indices.append(idx)
|
|
||||||
if len(system_prompt_indices) > 0:
|
if len(system_prompt_indices) > 0:
|
||||||
for idx in reversed(system_prompt_indices):
|
for idx in reversed(system_prompt_indices):
|
||||||
messages.pop(idx)
|
messages.pop(idx)
|
||||||
|
@ -627,9 +630,11 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
|
|
||||||
def _transform_thinking_blocks(
|
def _transform_thinking_blocks(
|
||||||
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
||||||
) -> List[ChatCompletionThinkingBlock]:
|
) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
|
||||||
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
||||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
thinking_blocks_list: List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = []
|
||||||
for block in thinking_blocks:
|
for block in thinking_blocks:
|
||||||
if "reasoningText" in block:
|
if "reasoningText" in block:
|
||||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
|
@ -640,6 +645,11 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
if _signature is not None:
|
if _signature is not None:
|
||||||
_thinking_block["signature"] = _signature
|
_thinking_block["signature"] = _signature
|
||||||
thinking_blocks_list.append(_thinking_block)
|
thinking_blocks_list.append(_thinking_block)
|
||||||
|
elif "redactedContent" in block:
|
||||||
|
_redacted_block = ChatCompletionRedactedThinkingBlock(
|
||||||
|
type="redacted_thinking", data=block["redactedContent"]
|
||||||
|
)
|
||||||
|
thinking_blocks_list.append(_redacted_block)
|
||||||
return thinking_blocks_list
|
return thinking_blocks_list
|
||||||
|
|
||||||
def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
|
def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
|
||||||
|
|
|
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
|
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:
|
||||||
|
|
||||||
def translate_thinking_blocks(
|
def translate_thinking_blocks(
|
||||||
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||||
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
) -> Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Translate the thinking blocks to a string
|
Translate the thinking blocks to a string
|
||||||
"""
|
"""
|
||||||
|
|
||||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
thinking_blocks_list: List[
|
||||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = []
|
||||||
|
_thinking_block: Optional[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = None
|
||||||
|
|
||||||
if "text" in thinking_block:
|
if "text" in thinking_block:
|
||||||
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
_thinking_block["thinking"] = thinking_block["text"]
|
_thinking_block["thinking"] = thinking_block["text"]
|
||||||
elif "signature" in thinking_block:
|
elif "signature" in thinking_block:
|
||||||
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
_thinking_block["signature"] = thinking_block["signature"]
|
_thinking_block["signature"] = thinking_block["signature"]
|
||||||
_thinking_block["thinking"] = "" # consistent with anthropic response
|
_thinking_block["thinking"] = "" # consistent with anthropic response
|
||||||
thinking_blocks_list.append(_thinking_block)
|
elif "redactedContent" in thinking_block:
|
||||||
|
_thinking_block = ChatCompletionRedactedThinkingBlock(
|
||||||
|
type="redacted_thinking", data=thinking_block["redactedContent"]
|
||||||
|
)
|
||||||
|
if _thinking_block is not None:
|
||||||
|
thinking_blocks_list.append(_thinking_block)
|
||||||
return thinking_blocks_list
|
return thinking_blocks_list
|
||||||
|
|
||||||
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||||
|
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
|
||||||
usage: Optional[Usage] = None
|
usage: Optional[Usage] = None
|
||||||
provider_specific_fields: dict = {}
|
provider_specific_fields: dict = {}
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
|
|
||||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||||
if "start" in chunk_data:
|
if "start" in chunk_data:
|
||||||
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
||||||
self.content_blocks = [] # reset
|
self.content_blocks = [] # reset
|
||||||
if (
|
if start_obj is not None:
|
||||||
start_obj is not None
|
if "toolUse" in start_obj and start_obj["toolUse"] is not None:
|
||||||
and "toolUse" in start_obj
|
## check tool name was formatted by litellm
|
||||||
and start_obj["toolUse"] is not None
|
_response_tool_name = start_obj["toolUse"]["name"]
|
||||||
):
|
response_tool_name = get_bedrock_tool_name(
|
||||||
## check tool name was formatted by litellm
|
response_tool_name=_response_tool_name
|
||||||
_response_tool_name = start_obj["toolUse"]["name"]
|
)
|
||||||
response_tool_name = get_bedrock_tool_name(
|
tool_use = {
|
||||||
response_tool_name=_response_tool_name
|
"id": start_obj["toolUse"]["toolUseId"],
|
||||||
)
|
"type": "function",
|
||||||
tool_use = {
|
"function": {
|
||||||
"id": start_obj["toolUse"]["toolUseId"],
|
"name": response_tool_name,
|
||||||
"type": "function",
|
"arguments": "",
|
||||||
"function": {
|
},
|
||||||
"name": response_tool_name,
|
"index": index,
|
||||||
"arguments": "",
|
}
|
||||||
},
|
elif (
|
||||||
"index": index,
|
"reasoningContent" in start_obj
|
||||||
}
|
and start_obj["reasoningContent"] is not None
|
||||||
|
): # redacted thinking can be in start object
|
||||||
|
thinking_blocks = self.translate_thinking_blocks(
|
||||||
|
start_obj["reasoningContent"]
|
||||||
|
)
|
||||||
|
provider_specific_fields = {
|
||||||
|
"reasoningContent": start_obj["reasoningContent"],
|
||||||
|
}
|
||||||
elif "delta" in chunk_data:
|
elif "delta" in chunk_data:
|
||||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||||
self.content_blocks.append(delta_obj)
|
self.content_blocks.append(delta_obj)
|
||||||
|
|
|
@ -229,13 +229,17 @@ class BaseLLMHTTPHandler:
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
headers: Optional[dict] = {},
|
headers: Optional[dict] = {},
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
|
provider_config: Optional[BaseConfig] = None,
|
||||||
):
|
):
|
||||||
json_mode: bool = optional_params.pop("json_mode", False)
|
json_mode: bool = optional_params.pop("json_mode", False)
|
||||||
extra_body: Optional[dict] = optional_params.pop("extra_body", None)
|
extra_body: Optional[dict] = optional_params.pop("extra_body", None)
|
||||||
fake_stream = fake_stream or optional_params.pop("fake_stream", False)
|
fake_stream = fake_stream or optional_params.pop("fake_stream", False)
|
||||||
|
|
||||||
provider_config = ProviderConfigManager.get_provider_chat_config(
|
provider_config = (
|
||||||
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
provider_config
|
||||||
|
or ProviderConfigManager.get_provider_chat_config(
|
||||||
|
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if provider_config is None:
|
if provider_config is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
|
|
@ -37,6 +37,7 @@ from litellm.types.llms.databricks import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolChoiceFunctionParam,
|
ChatCompletionToolChoiceFunctionParam,
|
||||||
ChatCompletionToolChoiceObjectParam,
|
ChatCompletionToolChoiceObjectParam,
|
||||||
|
@ -314,13 +315,24 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_reasoning_content(
|
def extract_reasoning_content(
|
||||||
content: Optional[AllDatabricksContentValues],
|
content: Optional[AllDatabricksContentValues],
|
||||||
) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]:
|
) -> Tuple[
|
||||||
|
Optional[str],
|
||||||
|
Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Extract and return the reasoning content and thinking blocks
|
Extract and return the reasoning content and thinking blocks
|
||||||
"""
|
"""
|
||||||
if content is None:
|
if content is None:
|
||||||
return None, None
|
return None, None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
for item in content:
|
for item in content:
|
||||||
|
|
|
@ -1,15 +1,33 @@
|
||||||
from typing import List, Literal, Optional, Tuple, Union, cast
|
import json
|
||||||
|
import uuid
|
||||||
|
from typing import Any, List, Literal, Optional, Tuple, Union, cast
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.litellm_core_utils.llm_response_utils.get_headers import (
|
||||||
|
get_response_headers,
|
||||||
|
)
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
ChatCompletionImageObject,
|
ChatCompletionImageObject,
|
||||||
|
ChatCompletionToolParam,
|
||||||
OpenAIChatCompletionToolParam,
|
OpenAIChatCompletionToolParam,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import ProviderSpecificModelInfo
|
from litellm.types.utils import (
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
Choices,
|
||||||
|
Function,
|
||||||
|
Message,
|
||||||
|
ModelResponse,
|
||||||
|
ProviderSpecificModelInfo,
|
||||||
|
)
|
||||||
|
|
||||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
|
from ..common_utils import FireworksAIException
|
||||||
|
|
||||||
|
|
||||||
class FireworksAIConfig(OpenAIGPTConfig):
|
class FireworksAIConfig(OpenAIGPTConfig):
|
||||||
|
@ -219,6 +237,94 @@ class FireworksAIConfig(OpenAIGPTConfig):
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _handle_message_content_with_tool_calls(
|
||||||
|
self,
|
||||||
|
message: Message,
|
||||||
|
tool_calls: Optional[List[ChatCompletionToolParam]],
|
||||||
|
) -> Message:
|
||||||
|
"""
|
||||||
|
Fireworks AI sends tool calls in the content field instead of tool_calls
|
||||||
|
|
||||||
|
Relevant Issue: https://github.com/BerriAI/litellm/issues/7209#issuecomment-2813208780
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
tool_calls is not None
|
||||||
|
and message.content is not None
|
||||||
|
and message.tool_calls is None
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
function = Function(**json.loads(message.content))
|
||||||
|
if function.name != RESPONSE_FORMAT_TOOL_NAME and function.name in [
|
||||||
|
tool["function"]["name"] for tool in tool_calls
|
||||||
|
]:
|
||||||
|
tool_call = ChatCompletionMessageToolCall(
|
||||||
|
function=function, id=str(uuid.uuid4()), type="function"
|
||||||
|
)
|
||||||
|
message.tool_calls = [tool_call]
|
||||||
|
|
||||||
|
message.content = None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return message
|
||||||
|
|
||||||
|
def transform_response(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
raw_response: httpx.Response,
|
||||||
|
model_response: ModelResponse,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
request_data: dict,
|
||||||
|
messages: List[AllMessageValues],
|
||||||
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
|
encoding: Any,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
|
) -> ModelResponse:
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.post_call(
|
||||||
|
input=messages,
|
||||||
|
api_key=api_key,
|
||||||
|
original_response=raw_response.text,
|
||||||
|
additional_args={"complete_input_dict": request_data},
|
||||||
|
)
|
||||||
|
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
try:
|
||||||
|
completion_response = raw_response.json()
|
||||||
|
except Exception as e:
|
||||||
|
response_headers = getattr(raw_response, "headers", None)
|
||||||
|
raise FireworksAIException(
|
||||||
|
message="Unable to get json response - {}, Original Response: {}".format(
|
||||||
|
str(e), raw_response.text
|
||||||
|
),
|
||||||
|
status_code=raw_response.status_code,
|
||||||
|
headers=response_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
raw_response_headers = dict(raw_response.headers)
|
||||||
|
|
||||||
|
additional_headers = get_response_headers(raw_response_headers)
|
||||||
|
|
||||||
|
response = ModelResponse(**completion_response)
|
||||||
|
|
||||||
|
if response.model is not None:
|
||||||
|
response.model = "fireworks_ai/" + response.model
|
||||||
|
|
||||||
|
## FIREWORKS AI sends tool calls in the content field instead of tool_calls
|
||||||
|
for choice in response.choices:
|
||||||
|
cast(
|
||||||
|
Choices, choice
|
||||||
|
).message = self._handle_message_content_with_tool_calls(
|
||||||
|
message=cast(Choices, choice).message,
|
||||||
|
tool_calls=optional_params.get("tools", None),
|
||||||
|
)
|
||||||
|
|
||||||
|
response._hidden_params = {"additional_headers": additional_headers}
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
def _get_openai_compatible_provider_info(
|
def _get_openai_compatible_provider_info(
|
||||||
self, api_base: Optional[str], api_key: Optional[str]
|
self, api_base: Optional[str], api_key: Optional[str]
|
||||||
) -> Tuple[Optional[str], Optional[str]]:
|
) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|
|
@ -7,6 +7,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.llms.vertex_ai import ContentType, PartType
|
from litellm.types.llms.vertex_ai import ContentType, PartType
|
||||||
|
from litellm.utils import supports_reasoning
|
||||||
|
|
||||||
from ...vertex_ai.gemini.transformation import _gemini_convert_messages_with_history
|
from ...vertex_ai.gemini.transformation import _gemini_convert_messages_with_history
|
||||||
from ...vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
|
from ...vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
|
||||||
|
@ -67,7 +68,7 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||||
return [
|
supported_params = [
|
||||||
"temperature",
|
"temperature",
|
||||||
"top_p",
|
"top_p",
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
|
@ -83,6 +84,10 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig):
|
||||||
"frequency_penalty",
|
"frequency_penalty",
|
||||||
"modalities",
|
"modalities",
|
||||||
]
|
]
|
||||||
|
if supports_reasoning(model):
|
||||||
|
supported_params.append("reasoning_effort")
|
||||||
|
supported_params.append("thinking")
|
||||||
|
return supported_params
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -2,9 +2,19 @@
|
||||||
Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
|
Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import List, Optional, Tuple, cast
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
|
_get_image_mime_type_from_url,
|
||||||
|
)
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
AllMessageValues,
|
||||||
|
ChatCompletionFileObject,
|
||||||
|
ChatCompletionVideoObject,
|
||||||
|
ChatCompletionVideoUrlObject,
|
||||||
|
)
|
||||||
|
|
||||||
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
|
@ -38,3 +48,71 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
|
||||||
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
|
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
|
||||||
) # vllm does not require an api key
|
) # vllm does not require an api key
|
||||||
return api_base, dynamic_api_key
|
return api_base, dynamic_api_key
|
||||||
|
|
||||||
|
def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the file is a video
|
||||||
|
|
||||||
|
- format: video/<extension>
|
||||||
|
- file_data: base64 encoded video data
|
||||||
|
- file_id: infer mp4 from extension
|
||||||
|
"""
|
||||||
|
file = content_item.get("file", {})
|
||||||
|
format = file.get("format")
|
||||||
|
file_data = file.get("file_data")
|
||||||
|
file_id = file.get("file_id")
|
||||||
|
if content_item.get("type") != "file":
|
||||||
|
return False
|
||||||
|
if format and format.startswith("video/"):
|
||||||
|
return True
|
||||||
|
elif file_data:
|
||||||
|
mime_type = _parse_mime_type(file_data)
|
||||||
|
if mime_type and mime_type.startswith("video/"):
|
||||||
|
return True
|
||||||
|
elif file_id:
|
||||||
|
mime_type = _get_image_mime_type_from_url(file_id)
|
||||||
|
if mime_type and mime_type.startswith("video/"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _convert_file_to_video_url(
|
||||||
|
self, content_item: ChatCompletionFileObject
|
||||||
|
) -> ChatCompletionVideoObject:
|
||||||
|
file = content_item.get("file", {})
|
||||||
|
file_id = file.get("file_id")
|
||||||
|
file_data = file.get("file_data")
|
||||||
|
|
||||||
|
if file_id:
|
||||||
|
return ChatCompletionVideoObject(
|
||||||
|
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
|
||||||
|
)
|
||||||
|
elif file_data:
|
||||||
|
return ChatCompletionVideoObject(
|
||||||
|
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
|
||||||
|
)
|
||||||
|
raise ValueError("file_id or file_data is required")
|
||||||
|
|
||||||
|
def _transform_messages(
|
||||||
|
self, messages: List[AllMessageValues], model: str
|
||||||
|
) -> List[AllMessageValues]:
|
||||||
|
"""
|
||||||
|
Support translating video files from file_id or file_data to video_url
|
||||||
|
"""
|
||||||
|
for message in messages:
|
||||||
|
if message["role"] == "user":
|
||||||
|
message_content = message.get("content")
|
||||||
|
if message_content and isinstance(message_content, list):
|
||||||
|
replaced_content_items: List[
|
||||||
|
Tuple[int, ChatCompletionFileObject]
|
||||||
|
] = []
|
||||||
|
for idx, content_item in enumerate(message_content):
|
||||||
|
if content_item.get("type") == "file":
|
||||||
|
content_item = cast(ChatCompletionFileObject, content_item)
|
||||||
|
if self._is_video_file(content_item):
|
||||||
|
replaced_content_items.append((idx, content_item))
|
||||||
|
for idx, content_item in replaced_content_items:
|
||||||
|
message_content[idx] = self._convert_file_to_video_url(
|
||||||
|
content_item
|
||||||
|
)
|
||||||
|
transformed_messages = super()._transform_messages(messages, model)
|
||||||
|
return transformed_messages
|
||||||
|
|
|
@ -13,6 +13,7 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
|
||||||
def get_supported_openai_params(self, model: str) -> List:
|
def get_supported_openai_params(self, model: str) -> List:
|
||||||
list = super().get_supported_openai_params(model)
|
list = super().get_supported_openai_params(model)
|
||||||
list.append("thinking")
|
list.append("thinking")
|
||||||
|
list.append("reasoning_effort")
|
||||||
return list
|
return list
|
||||||
|
|
||||||
def _map_openai_params(
|
def _map_openai_params(
|
||||||
|
|
|
@ -201,8 +201,6 @@ class TritonGenerateConfig(TritonConfig):
|
||||||
"max_tokens": int(
|
"max_tokens": int(
|
||||||
optional_params.get("max_tokens", DEFAULT_MAX_TOKENS_FOR_TRITON)
|
optional_params.get("max_tokens", DEFAULT_MAX_TOKENS_FOR_TRITON)
|
||||||
),
|
),
|
||||||
"bad_words": [""],
|
|
||||||
"stop_words": [""],
|
|
||||||
},
|
},
|
||||||
"stream": bool(stream),
|
"stream": bool(stream),
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,9 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
|
_get_image_mime_type_from_url,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||||
convert_to_anthropic_image_obj,
|
convert_to_anthropic_image_obj,
|
||||||
convert_to_gemini_tool_call_invoke,
|
convert_to_gemini_tool_call_invoke,
|
||||||
|
@ -99,62 +102,6 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Get mime type for common image URLs
|
|
||||||
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
|
|
||||||
|
|
||||||
Supported by Gemini:
|
|
||||||
application/pdf
|
|
||||||
audio/mpeg
|
|
||||||
audio/mp3
|
|
||||||
audio/wav
|
|
||||||
image/png
|
|
||||||
image/jpeg
|
|
||||||
image/webp
|
|
||||||
text/plain
|
|
||||||
video/mov
|
|
||||||
video/mpeg
|
|
||||||
video/mp4
|
|
||||||
video/mpg
|
|
||||||
video/avi
|
|
||||||
video/wmv
|
|
||||||
video/mpegps
|
|
||||||
video/flv
|
|
||||||
"""
|
|
||||||
url = url.lower()
|
|
||||||
|
|
||||||
# Map file extensions to mime types
|
|
||||||
mime_types = {
|
|
||||||
# Images
|
|
||||||
(".jpg", ".jpeg"): "image/jpeg",
|
|
||||||
(".png",): "image/png",
|
|
||||||
(".webp",): "image/webp",
|
|
||||||
# Videos
|
|
||||||
(".mp4",): "video/mp4",
|
|
||||||
(".mov",): "video/mov",
|
|
||||||
(".mpeg", ".mpg"): "video/mpeg",
|
|
||||||
(".avi",): "video/avi",
|
|
||||||
(".wmv",): "video/wmv",
|
|
||||||
(".mpegps",): "video/mpegps",
|
|
||||||
(".flv",): "video/flv",
|
|
||||||
# Audio
|
|
||||||
(".mp3",): "audio/mp3",
|
|
||||||
(".wav",): "audio/wav",
|
|
||||||
(".mpeg",): "audio/mpeg",
|
|
||||||
# Documents
|
|
||||||
(".pdf",): "application/pdf",
|
|
||||||
(".txt",): "text/plain",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check each extension group against the URL
|
|
||||||
for extensions, mime_type in mime_types.items():
|
|
||||||
if any(url.endswith(ext) for ext in extensions):
|
|
||||||
return mime_type
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _gemini_convert_messages_with_history( # noqa: PLR0915
|
def _gemini_convert_messages_with_history( # noqa: PLR0915
|
||||||
messages: List[AllMessageValues],
|
messages: List[AllMessageValues],
|
||||||
) -> List[ContentType]:
|
) -> List[ContentType]:
|
||||||
|
@ -269,6 +216,11 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915
|
||||||
msg_dict = messages[msg_i] # type: ignore
|
msg_dict = messages[msg_i] # type: ignore
|
||||||
assistant_msg = ChatCompletionAssistantMessage(**msg_dict) # type: ignore
|
assistant_msg = ChatCompletionAssistantMessage(**msg_dict) # type: ignore
|
||||||
_message_content = assistant_msg.get("content", None)
|
_message_content = assistant_msg.get("content", None)
|
||||||
|
reasoning_content = assistant_msg.get("reasoning_content", None)
|
||||||
|
if reasoning_content is not None:
|
||||||
|
assistant_content.append(
|
||||||
|
PartType(thought=True, text=reasoning_content)
|
||||||
|
)
|
||||||
if _message_content is not None and isinstance(_message_content, list):
|
if _message_content is not None and isinstance(_message_content, list):
|
||||||
_parts = []
|
_parts = []
|
||||||
for element in _message_content:
|
for element in _message_content:
|
||||||
|
@ -276,6 +228,7 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915
|
||||||
if element["type"] == "text":
|
if element["type"] == "text":
|
||||||
_part = PartType(text=element["text"])
|
_part = PartType(text=element["text"])
|
||||||
_parts.append(_part)
|
_parts.append(_part)
|
||||||
|
|
||||||
assistant_content.extend(_parts)
|
assistant_content.extend(_parts)
|
||||||
elif (
|
elif (
|
||||||
_message_content is not None
|
_message_content is not None
|
||||||
|
|
|
@ -24,6 +24,11 @@ import litellm
|
||||||
import litellm.litellm_core_utils
|
import litellm.litellm_core_utils
|
||||||
import litellm.litellm_core_utils.litellm_logging
|
import litellm.litellm_core_utils.litellm_logging
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
|
from litellm.constants import (
|
||||||
|
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||||
|
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||||
|
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
|
@ -31,6 +36,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
HTTPHandler,
|
HTTPHandler,
|
||||||
get_async_httpx_client,
|
get_async_httpx_client,
|
||||||
)
|
)
|
||||||
|
from litellm.types.llms.anthropic import AnthropicThinkingParam
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
ChatCompletionResponseMessage,
|
ChatCompletionResponseMessage,
|
||||||
|
@ -45,6 +51,7 @@ from litellm.types.llms.vertex_ai import (
|
||||||
ContentType,
|
ContentType,
|
||||||
FunctionCallingConfig,
|
FunctionCallingConfig,
|
||||||
FunctionDeclaration,
|
FunctionDeclaration,
|
||||||
|
GeminiThinkingConfig,
|
||||||
GenerateContentResponseBody,
|
GenerateContentResponseBody,
|
||||||
HttpxPartType,
|
HttpxPartType,
|
||||||
LogprobsResult,
|
LogprobsResult,
|
||||||
|
@ -59,7 +66,7 @@ from litellm.types.utils import (
|
||||||
TopLogprob,
|
TopLogprob,
|
||||||
Usage,
|
Usage,
|
||||||
)
|
)
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse
|
from litellm.utils import CustomStreamWrapper, ModelResponse, supports_reasoning
|
||||||
|
|
||||||
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||||
from ..common_utils import VertexAIError, _build_vertex_schema
|
from ..common_utils import VertexAIError, _build_vertex_schema
|
||||||
|
@ -190,7 +197,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||||
return [
|
supported_params = [
|
||||||
"temperature",
|
"temperature",
|
||||||
"top_p",
|
"top_p",
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
|
@ -210,6 +217,10 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
"top_logprobs",
|
"top_logprobs",
|
||||||
"modalities",
|
"modalities",
|
||||||
]
|
]
|
||||||
|
if supports_reasoning(model):
|
||||||
|
supported_params.append("reasoning_effort")
|
||||||
|
supported_params.append("thinking")
|
||||||
|
return supported_params
|
||||||
|
|
||||||
def map_tool_choice_values(
|
def map_tool_choice_values(
|
||||||
self, model: str, tool_choice: Union[str, dict]
|
self, model: str, tool_choice: Union[str, dict]
|
||||||
|
@ -313,10 +324,14 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
if isinstance(old_schema, list):
|
if isinstance(old_schema, list):
|
||||||
for item in old_schema:
|
for item in old_schema:
|
||||||
if isinstance(item, dict):
|
if isinstance(item, dict):
|
||||||
item = _build_vertex_schema(parameters=item, add_property_ordering=True)
|
item = _build_vertex_schema(
|
||||||
|
parameters=item, add_property_ordering=True
|
||||||
|
)
|
||||||
|
|
||||||
elif isinstance(old_schema, dict):
|
elif isinstance(old_schema, dict):
|
||||||
old_schema = _build_vertex_schema(parameters=old_schema, add_property_ordering=True)
|
old_schema = _build_vertex_schema(
|
||||||
|
parameters=old_schema, add_property_ordering=True
|
||||||
|
)
|
||||||
return old_schema
|
return old_schema
|
||||||
|
|
||||||
def apply_response_schema_transformation(self, value: dict, optional_params: dict):
|
def apply_response_schema_transformation(self, value: dict, optional_params: dict):
|
||||||
|
@ -343,6 +358,43 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
value=optional_params["response_schema"]
|
value=optional_params["response_schema"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _map_reasoning_effort_to_thinking_budget(
|
||||||
|
reasoning_effort: str,
|
||||||
|
) -> GeminiThinkingConfig:
|
||||||
|
if reasoning_effort == "low":
|
||||||
|
return {
|
||||||
|
"thinkingBudget": DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
|
||||||
|
"includeThoughts": True,
|
||||||
|
}
|
||||||
|
elif reasoning_effort == "medium":
|
||||||
|
return {
|
||||||
|
"thinkingBudget": DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
|
||||||
|
"includeThoughts": True,
|
||||||
|
}
|
||||||
|
elif reasoning_effort == "high":
|
||||||
|
return {
|
||||||
|
"thinkingBudget": DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
|
||||||
|
"includeThoughts": True,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid reasoning effort: {reasoning_effort}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _map_thinking_param(
|
||||||
|
thinking_param: AnthropicThinkingParam,
|
||||||
|
) -> GeminiThinkingConfig:
|
||||||
|
thinking_enabled = thinking_param.get("type") == "enabled"
|
||||||
|
thinking_budget = thinking_param.get("budget_tokens")
|
||||||
|
|
||||||
|
params: GeminiThinkingConfig = {}
|
||||||
|
if thinking_enabled:
|
||||||
|
params["includeThoughts"] = True
|
||||||
|
if thinking_budget:
|
||||||
|
params["thinkingBudget"] = thinking_budget
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self,
|
self,
|
||||||
non_default_params: Dict,
|
non_default_params: Dict,
|
||||||
|
@ -399,6 +451,16 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
optional_params["tool_choice"] = _tool_choice_value
|
optional_params["tool_choice"] = _tool_choice_value
|
||||||
elif param == "seed":
|
elif param == "seed":
|
||||||
optional_params["seed"] = value
|
optional_params["seed"] = value
|
||||||
|
elif param == "reasoning_effort" and isinstance(value, str):
|
||||||
|
optional_params[
|
||||||
|
"thinkingConfig"
|
||||||
|
] = VertexGeminiConfig._map_reasoning_effort_to_thinking_budget(value)
|
||||||
|
elif param == "thinking":
|
||||||
|
optional_params[
|
||||||
|
"thinkingConfig"
|
||||||
|
] = VertexGeminiConfig._map_thinking_param(
|
||||||
|
cast(AnthropicThinkingParam, value)
|
||||||
|
)
|
||||||
elif param == "modalities" and isinstance(value, list):
|
elif param == "modalities" and isinstance(value, list):
|
||||||
response_modalities = []
|
response_modalities = []
|
||||||
for modality in value:
|
for modality in value:
|
||||||
|
@ -514,19 +576,28 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
|
|
||||||
def get_assistant_content_message(
|
def get_assistant_content_message(
|
||||||
self, parts: List[HttpxPartType]
|
self, parts: List[HttpxPartType]
|
||||||
) -> Optional[str]:
|
) -> Tuple[Optional[str], Optional[str]]:
|
||||||
_content_str = ""
|
content_str: Optional[str] = None
|
||||||
|
reasoning_content_str: Optional[str] = None
|
||||||
for part in parts:
|
for part in parts:
|
||||||
|
_content_str = ""
|
||||||
if "text" in part:
|
if "text" in part:
|
||||||
_content_str += part["text"]
|
_content_str += part["text"]
|
||||||
elif "inlineData" in part: # base64 encoded image
|
elif "inlineData" in part: # base64 encoded image
|
||||||
_content_str += "data:{};base64,{}".format(
|
_content_str += "data:{};base64,{}".format(
|
||||||
part["inlineData"]["mimeType"], part["inlineData"]["data"]
|
part["inlineData"]["mimeType"], part["inlineData"]["data"]
|
||||||
)
|
)
|
||||||
|
if len(_content_str) > 0:
|
||||||
|
if part.get("thought") is True:
|
||||||
|
if reasoning_content_str is None:
|
||||||
|
reasoning_content_str = ""
|
||||||
|
reasoning_content_str += _content_str
|
||||||
|
else:
|
||||||
|
if content_str is None:
|
||||||
|
content_str = ""
|
||||||
|
content_str += _content_str
|
||||||
|
|
||||||
if _content_str:
|
return content_str, reasoning_content_str
|
||||||
return _content_str
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _transform_parts(
|
def _transform_parts(
|
||||||
self,
|
self,
|
||||||
|
@ -677,6 +748,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
audio_tokens: Optional[int] = None
|
audio_tokens: Optional[int] = None
|
||||||
text_tokens: Optional[int] = None
|
text_tokens: Optional[int] = None
|
||||||
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
|
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
|
||||||
|
reasoning_tokens: Optional[int] = None
|
||||||
if "cachedContentTokenCount" in completion_response["usageMetadata"]:
|
if "cachedContentTokenCount" in completion_response["usageMetadata"]:
|
||||||
cached_tokens = completion_response["usageMetadata"][
|
cached_tokens = completion_response["usageMetadata"][
|
||||||
"cachedContentTokenCount"
|
"cachedContentTokenCount"
|
||||||
|
@ -687,7 +759,10 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
audio_tokens = detail["tokenCount"]
|
audio_tokens = detail["tokenCount"]
|
||||||
elif detail["modality"] == "TEXT":
|
elif detail["modality"] == "TEXT":
|
||||||
text_tokens = detail["tokenCount"]
|
text_tokens = detail["tokenCount"]
|
||||||
|
if "thoughtsTokenCount" in completion_response["usageMetadata"]:
|
||||||
|
reasoning_tokens = completion_response["usageMetadata"][
|
||||||
|
"thoughtsTokenCount"
|
||||||
|
]
|
||||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||||
cached_tokens=cached_tokens,
|
cached_tokens=cached_tokens,
|
||||||
audio_tokens=audio_tokens,
|
audio_tokens=audio_tokens,
|
||||||
|
@ -703,6 +778,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
),
|
),
|
||||||
total_tokens=completion_response["usageMetadata"].get("totalTokenCount", 0),
|
total_tokens=completion_response["usageMetadata"].get("totalTokenCount", 0),
|
||||||
prompt_tokens_details=prompt_tokens_details,
|
prompt_tokens_details=prompt_tokens_details,
|
||||||
|
reasoning_tokens=reasoning_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
return usage
|
return usage
|
||||||
|
@ -731,11 +807,16 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
citation_metadata.append(candidate["citationMetadata"])
|
citation_metadata.append(candidate["citationMetadata"])
|
||||||
|
|
||||||
if "parts" in candidate["content"]:
|
if "parts" in candidate["content"]:
|
||||||
chat_completion_message[
|
(
|
||||||
"content"
|
content,
|
||||||
] = VertexGeminiConfig().get_assistant_content_message(
|
reasoning_content,
|
||||||
|
) = VertexGeminiConfig().get_assistant_content_message(
|
||||||
parts=candidate["content"]["parts"]
|
parts=candidate["content"]["parts"]
|
||||||
)
|
)
|
||||||
|
if content is not None:
|
||||||
|
chat_completion_message["content"] = content
|
||||||
|
if reasoning_content is not None:
|
||||||
|
chat_completion_message["reasoning_content"] = reasoning_content
|
||||||
|
|
||||||
functions, tools = self._transform_parts(
|
functions, tools = self._transform_parts(
|
||||||
parts=candidate["content"]["parts"],
|
parts=candidate["content"]["parts"],
|
||||||
|
|
|
@ -38,7 +38,7 @@ def generate_iam_token(api_key=None, **params) -> str:
|
||||||
headers = {}
|
headers = {}
|
||||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
api_key = get_secret_str("WX_API_KEY") or get_secret_str("WATSONX_API_KEY")
|
api_key = get_secret_str("WX_API_KEY") or get_secret_str("WATSONX_API_KEY") or get_secret_str("WATSONX_APIKEY")
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
raise ValueError("API key is required")
|
raise ValueError("API key is required")
|
||||||
headers["Accept"] = "application/json"
|
headers["Accept"] = "application/json"
|
||||||
|
|
|
@ -1435,6 +1435,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
stream=stream,
|
stream=stream,
|
||||||
|
provider_config=provider_config,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
## LOGGING - log the original exception returned
|
## LOGGING - log the original exception returned
|
||||||
|
@ -1596,6 +1597,37 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
additional_args={"headers": headers},
|
additional_args={"headers": headers},
|
||||||
)
|
)
|
||||||
response = _response
|
response = _response
|
||||||
|
elif custom_llm_provider == "fireworks_ai":
|
||||||
|
## COMPLETION CALL
|
||||||
|
try:
|
||||||
|
response = base_llm_http_handler.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
headers=headers,
|
||||||
|
model_response=model_response,
|
||||||
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
acompletion=acompletion,
|
||||||
|
logging_obj=logging,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
timeout=timeout, # type: ignore
|
||||||
|
client=client,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
encoding=encoding,
|
||||||
|
stream=stream,
|
||||||
|
provider_config=provider_config,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
## LOGGING - log the original exception returned
|
||||||
|
logging.post_call(
|
||||||
|
input=messages,
|
||||||
|
api_key=api_key,
|
||||||
|
original_response=str(e),
|
||||||
|
additional_args={"headers": headers},
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
elif custom_llm_provider == "groq":
|
elif custom_llm_provider == "groq":
|
||||||
api_base = (
|
api_base = (
|
||||||
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
|
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
"max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
|
"max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
|
||||||
"input_cost_per_token": 0.0000,
|
"input_cost_per_token": 0.0000,
|
||||||
"output_cost_per_token": 0.000,
|
"output_cost_per_token": 0.000,
|
||||||
|
"output_cost_per_reasoning_token": 0.000,
|
||||||
"litellm_provider": "one of https://docs.litellm.ai/docs/providers",
|
"litellm_provider": "one of https://docs.litellm.ai/docs/providers",
|
||||||
"mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
|
"mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
|
@ -1471,6 +1472,73 @@
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"supported_endpoints": ["/v1/audio/speech"]
|
"supported_endpoints": ["/v1/audio/speech"]
|
||||||
},
|
},
|
||||||
|
"azure/computer-use-preview": {
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 1024,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000012,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": false,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": false,
|
||||||
|
"supports_reasoning": true
|
||||||
|
},
|
||||||
|
"azure/gpt-4o-audio-preview-2024-12-17": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.0000025,
|
||||||
|
"input_cost_per_audio_token": 0.00004,
|
||||||
|
"output_cost_per_token": 0.00001,
|
||||||
|
"output_cost_per_audio_token": 0.00008,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions"],
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": false,
|
||||||
|
"supports_vision": false,
|
||||||
|
"supports_prompt_caching": false,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": true,
|
||||||
|
"supports_reasoning": false
|
||||||
|
},
|
||||||
|
"azure/gpt-4o-mini-audio-preview-2024-12-17": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.0000025,
|
||||||
|
"input_cost_per_audio_token": 0.00004,
|
||||||
|
"output_cost_per_token": 0.00001,
|
||||||
|
"output_cost_per_audio_token": 0.00008,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions"],
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": false,
|
||||||
|
"supports_vision": false,
|
||||||
|
"supports_prompt_caching": false,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": true,
|
||||||
|
"supports_reasoning": false
|
||||||
|
},
|
||||||
"azure/gpt-4.1": {
|
"azure/gpt-4.1": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 1047576,
|
"max_input_tokens": 1047576,
|
||||||
|
@ -1529,6 +1597,170 @@
|
||||||
"search_context_size_high": 50e-3
|
"search_context_size_high": 50e-3
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"azure/gpt-4.1-mini": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 1047576,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.4e-6,
|
||||||
|
"output_cost_per_token": 1.6e-6,
|
||||||
|
"input_cost_per_token_batches": 0.2e-6,
|
||||||
|
"output_cost_per_token_batches": 0.8e-6,
|
||||||
|
"cache_read_input_token_cost": 0.1e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": true,
|
||||||
|
"supports_web_search": true,
|
||||||
|
"search_context_cost_per_query": {
|
||||||
|
"search_context_size_low": 25e-3,
|
||||||
|
"search_context_size_medium": 27.5e-3,
|
||||||
|
"search_context_size_high": 30e-3
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"azure/gpt-4.1-mini-2025-04-14": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 1047576,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.4e-6,
|
||||||
|
"output_cost_per_token": 1.6e-6,
|
||||||
|
"input_cost_per_token_batches": 0.2e-6,
|
||||||
|
"output_cost_per_token_batches": 0.8e-6,
|
||||||
|
"cache_read_input_token_cost": 0.1e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": true,
|
||||||
|
"supports_web_search": true,
|
||||||
|
"search_context_cost_per_query": {
|
||||||
|
"search_context_size_low": 25e-3,
|
||||||
|
"search_context_size_medium": 27.5e-3,
|
||||||
|
"search_context_size_high": 30e-3
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"azure/gpt-4.1-nano": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 1047576,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.1e-6,
|
||||||
|
"output_cost_per_token": 0.4e-6,
|
||||||
|
"input_cost_per_token_batches": 0.05e-6,
|
||||||
|
"output_cost_per_token_batches": 0.2e-6,
|
||||||
|
"cache_read_input_token_cost": 0.025e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": true
|
||||||
|
},
|
||||||
|
"azure/gpt-4.1-nano-2025-04-14": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 1047576,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.1e-6,
|
||||||
|
"output_cost_per_token": 0.4e-6,
|
||||||
|
"input_cost_per_token_batches": 0.05e-6,
|
||||||
|
"output_cost_per_token_batches": 0.2e-6,
|
||||||
|
"cache_read_input_token_cost": 0.025e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supports_native_streaming": true
|
||||||
|
},
|
||||||
|
"azure/o3": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 100000,
|
||||||
|
"input_cost_per_token": 1e-5,
|
||||||
|
"output_cost_per_token": 4e-5,
|
||||||
|
"cache_read_input_token_cost": 2.5e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": false,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"azure/o3-2025-04-16": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 100000,
|
||||||
|
"input_cost_per_token": 1e-5,
|
||||||
|
"output_cost_per_token": 4e-5,
|
||||||
|
"cache_read_input_token_cost": 2.5e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": false,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"azure/o4-mini": {
|
||||||
|
"max_tokens": 100000,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 100000,
|
||||||
|
"input_cost_per_token": 1.1e-6,
|
||||||
|
"output_cost_per_token": 4.4e-6,
|
||||||
|
"cache_read_input_token_cost": 2.75e-7,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"],
|
||||||
|
"supported_modalities": ["text", "image"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": false,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"azure/gpt-4o-mini-realtime-preview-2024-12-17": {
|
"azure/gpt-4o-mini-realtime-preview-2024-12-17": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
@ -5178,9 +5410,10 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_audio_token": 0.0000001,
|
"input_cost_per_audio_token": 1e-6,
|
||||||
"input_cost_per_token": 0.00000015,
|
"input_cost_per_token": 0.15e-6,
|
||||||
"output_cost_per_token": 0.00000060,
|
"output_cost_per_token": 0.6e-6,
|
||||||
|
"output_cost_per_reasoning_token": 3.5e-6,
|
||||||
"litellm_provider": "gemini",
|
"litellm_provider": "gemini",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"rpm": 10,
|
"rpm": 10,
|
||||||
|
@ -5188,9 +5421,39 @@
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
|
"supports_reasoning": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_audio_output": false,
|
"supports_audio_output": false,
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||||
|
"supported_modalities": ["text", "image", "audio", "video"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||||
|
},
|
||||||
|
"gemini-2.5-flash-preview-04-17": {
|
||||||
|
"max_tokens": 65536,
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 65536,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_audio_token": 1e-6,
|
||||||
|
"input_cost_per_token": 0.15e-6,
|
||||||
|
"output_cost_per_token": 0.6e-6,
|
||||||
|
"output_cost_per_reasoning_token": 3.5e-6,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": false,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"],
|
||||||
"supported_modalities": ["text", "image", "audio", "video"],
|
"supported_modalities": ["text", "image", "audio", "video"],
|
||||||
"supported_output_modalities": ["text"],
|
"supported_output_modalities": ["text"],
|
||||||
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||||
|
@ -5269,6 +5532,35 @@
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"gemini-2.5-pro-preview-03-25": {
|
||||||
|
"max_tokens": 65536,
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 65536,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_audio_token": 0.00000125,
|
||||||
|
"input_cost_per_token": 0.00000125,
|
||||||
|
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||||
|
"output_cost_per_token": 0.00001,
|
||||||
|
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_reasoning": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": false,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"],
|
||||||
|
"supported_modalities": ["text", "image", "audio", "video"],
|
||||||
|
"supported_output_modalities": ["text"],
|
||||||
|
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
|
||||||
|
},
|
||||||
"gemini/gemini-2.0-pro-exp-02-05": {
|
"gemini/gemini-2.0-pro-exp-02-05": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 2097152,
|
"max_input_tokens": 2097152,
|
||||||
|
|
1
litellm/openai-responses-starter-app
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit bf0485467c343957ba5c217db777f407b2e65453
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{8672:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),c=n(20831),i=n(94789),l=n(12514),r=n(49804),u=n(67101),m=n(84264),d=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),g=n(73002),j=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,j.e)("token");let n=t.get("invitation_id"),[S,w]=(0,o.useState)(null),[Z,_]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,y]=(0,o.useState)(null),[E,v]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),v(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),w(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),y(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(m.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(i.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tier."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(c.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{let t="/ui/";t+="?login=success",document.cookie="token="+C,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(d.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(d.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(g.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=8672)}),_N_E=e.O()}]);
|
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{8672:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tier."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=8672)}),_N_E=e.O()}]);
|
|
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/be22292d8ac48764.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[94226,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-59f99bfbf676f282.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"875\",\"static/chunks/875-85b7d9e9afef48d5.js\",\"250\",\"static/chunks/250-7b7f46d48724f856.js\",\"699\",\"static/chunks/699-99a8a36b70ac90c1.js\",\"931\",\"static/chunks/app/page-1e545df8fad65452.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"u3E41CAVE1NTuNPVcBvVa\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/be22292d8ac48764.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/3da1b0cfa7d4e161.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[25762,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-59f99bfbf676f282.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"860\",\"static/chunks/860-c1d8f124df444312.js\",\"250\",\"static/chunks/250-a927a558002d8fb9.js\",\"699\",\"static/chunks/699-99a8a36b70ac90c1.js\",\"931\",\"static/chunks/app/page-8f2fcc2af91a32fd.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"FPIQgzUY81b7nl8zNun4_\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/3da1b0cfa7d4e161.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[94226,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","875","static/chunks/875-85b7d9e9afef48d5.js","250","static/chunks/250-7b7f46d48724f856.js","699","static/chunks/699-99a8a36b70ac90c1.js","931","static/chunks/app/page-1e545df8fad65452.js"],"default",1]
|
3:I[25762,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","860","static/chunks/860-c1d8f124df444312.js","250","static/chunks/250-a927a558002d8fb9.js","699","static/chunks/699-99a8a36b70ac90c1.js","931","static/chunks/app/page-8f2fcc2af91a32fd.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["u3E41CAVE1NTuNPVcBvVa",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/be22292d8ac48764.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["FPIQgzUY81b7nl8zNun4_",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/3da1b0cfa7d4e161.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[52829,["42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-7b7f46d48724f856.js","699","static/chunks/699-99a8a36b70ac90c1.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
|
3:I[52829,["42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-a927a558002d8fb9.js","699","static/chunks/699-99a8a36b70ac90c1.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["u3E41CAVE1NTuNPVcBvVa",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/be22292d8ac48764.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["FPIQgzUY81b7nl8zNun4_",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/3da1b0cfa7d4e161.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-59f99bfbf676f282.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-7b7f46d48724f856.js","461","static/chunks/app/onboarding/page-82b2525e758a7201.js"],"default",1]
|
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-59f99bfbf676f282.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-a927a558002d8fb9.js","461","static/chunks/app/onboarding/page-4f4c436bd23d48a0.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["u3E41CAVE1NTuNPVcBvVa",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/be22292d8ac48764.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["FPIQgzUY81b7nl8zNun4_",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/3da1b0cfa7d4e161.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -26,8 +26,10 @@ model_list:
|
||||||
model: azure/gpt-4.1
|
model: azure/gpt-4.1
|
||||||
api_key: os.environ/AZURE_API_KEY_REALTIME
|
api_key: os.environ/AZURE_API_KEY_REALTIME
|
||||||
api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/
|
api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/
|
||||||
|
- model_name: "xai/*"
|
||||||
|
litellm_params:
|
||||||
|
model: xai/*
|
||||||
|
api_key: os.environ/XAI_API_KEY
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
num_retries: 0
|
num_retries: 0
|
||||||
|
|
|
@ -287,6 +287,7 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/v1/models",
|
"/v1/models",
|
||||||
# token counter
|
# token counter
|
||||||
"/utils/token_counter",
|
"/utils/token_counter",
|
||||||
|
"/utils/transform_request",
|
||||||
# rerank
|
# rerank
|
||||||
"/rerank",
|
"/rerank",
|
||||||
"/v1/rerank",
|
"/v1/rerank",
|
||||||
|
@ -462,6 +463,7 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/team/member_delete",
|
"/team/member_delete",
|
||||||
"/team/permissions_list",
|
"/team/permissions_list",
|
||||||
"/team/permissions_update",
|
"/team/permissions_update",
|
||||||
|
"/team/daily/activity",
|
||||||
"/model/new",
|
"/model/new",
|
||||||
"/model/update",
|
"/model/update",
|
||||||
"/model/delete",
|
"/model/delete",
|
||||||
|
@ -650,9 +652,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
|
||||||
allowed_cache_controls: Optional[list] = []
|
allowed_cache_controls: Optional[list] = []
|
||||||
config: Optional[dict] = {}
|
config: Optional[dict] = {}
|
||||||
permissions: Optional[dict] = {}
|
permissions: Optional[dict] = {}
|
||||||
model_max_budget: Optional[dict] = (
|
model_max_budget: Optional[
|
||||||
{}
|
dict
|
||||||
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
] = {} # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
model_rpm_limit: Optional[dict] = None
|
model_rpm_limit: Optional[dict] = None
|
||||||
|
@ -911,12 +913,12 @@ class NewCustomerRequest(BudgetNewRequest):
|
||||||
alias: Optional[str] = None # human-friendly alias
|
alias: Optional[str] = None # human-friendly alias
|
||||||
blocked: bool = False # allow/disallow requests for this end-user
|
blocked: bool = False # allow/disallow requests for this end-user
|
||||||
budget_id: Optional[str] = None # give either a budget_id or max_budget
|
budget_id: Optional[str] = None # give either a budget_id or max_budget
|
||||||
allowed_model_region: Optional[AllowedModelRegion] = (
|
allowed_model_region: Optional[
|
||||||
None # require all user requests to use models in this specific region
|
AllowedModelRegion
|
||||||
)
|
] = None # require all user requests to use models in this specific region
|
||||||
default_model: Optional[str] = (
|
default_model: Optional[
|
||||||
None # if no equivalent model in allowed region - default all requests to this model
|
str
|
||||||
)
|
] = None # if no equivalent model in allowed region - default all requests to this model
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -938,12 +940,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
|
||||||
blocked: bool = False # allow/disallow requests for this end-user
|
blocked: bool = False # allow/disallow requests for this end-user
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
budget_id: Optional[str] = None # give either a budget_id or max_budget
|
budget_id: Optional[str] = None # give either a budget_id or max_budget
|
||||||
allowed_model_region: Optional[AllowedModelRegion] = (
|
allowed_model_region: Optional[
|
||||||
None # require all user requests to use models in this specific region
|
AllowedModelRegion
|
||||||
)
|
] = None # require all user requests to use models in this specific region
|
||||||
default_model: Optional[str] = (
|
default_model: Optional[
|
||||||
None # if no equivalent model in allowed region - default all requests to this model
|
str
|
||||||
)
|
] = None # if no equivalent model in allowed region - default all requests to this model
|
||||||
|
|
||||||
|
|
||||||
class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
|
class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
|
||||||
|
@ -1079,9 +1081,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):
|
||||||
|
|
||||||
class AddTeamCallback(LiteLLMPydanticObjectBase):
|
class AddTeamCallback(LiteLLMPydanticObjectBase):
|
||||||
callback_name: str
|
callback_name: str
|
||||||
callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
|
callback_type: Optional[
|
||||||
"success_and_failure"
|
Literal["success", "failure", "success_and_failure"]
|
||||||
)
|
] = "success_and_failure"
|
||||||
callback_vars: Dict[str, str]
|
callback_vars: Dict[str, str]
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
|
@ -1339,9 +1341,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
|
||||||
stored_in_db: Optional[bool]
|
stored_in_db: Optional[bool]
|
||||||
field_default_value: Any
|
field_default_value: Any
|
||||||
premium_field: bool = False
|
premium_field: bool = False
|
||||||
nested_fields: Optional[List[FieldDetail]] = (
|
nested_fields: Optional[
|
||||||
None # For nested dictionary or Pydantic fields
|
List[FieldDetail]
|
||||||
)
|
] = None # For nested dictionary or Pydantic fields
|
||||||
|
|
||||||
|
|
||||||
class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
|
class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
|
||||||
|
@ -1609,9 +1611,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
|
||||||
budget_id: Optional[str] = None
|
budget_id: Optional[str] = None
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
updated_at: datetime
|
updated_at: datetime
|
||||||
user: Optional[Any] = (
|
user: Optional[
|
||||||
None # You might want to replace 'Any' with a more specific type if available
|
Any
|
||||||
)
|
] = None # You might want to replace 'Any' with a more specific type if available
|
||||||
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
@ -2359,9 +2361,9 @@ class TeamModelDeleteRequest(BaseModel):
|
||||||
# Organization Member Requests
|
# Organization Member Requests
|
||||||
class OrganizationMemberAddRequest(OrgMemberAddRequest):
|
class OrganizationMemberAddRequest(OrgMemberAddRequest):
|
||||||
organization_id: str
|
organization_id: str
|
||||||
max_budget_in_organization: Optional[float] = (
|
max_budget_in_organization: Optional[
|
||||||
None # Users max budget within the organization
|
float
|
||||||
)
|
] = None # Users max budget within the organization
|
||||||
|
|
||||||
|
|
||||||
class OrganizationMemberDeleteRequest(MemberDeleteRequest):
|
class OrganizationMemberDeleteRequest(MemberDeleteRequest):
|
||||||
|
@ -2550,9 +2552,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
|
||||||
Maps provider names to their budget configs.
|
Maps provider names to their budget configs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
providers: Dict[str, ProviderBudgetResponseObject] = (
|
providers: Dict[
|
||||||
{}
|
str, ProviderBudgetResponseObject
|
||||||
) # Dictionary mapping provider names to their budget configurations
|
] = {} # Dictionary mapping provider names to their budget configurations
|
||||||
|
|
||||||
|
|
||||||
class ProxyStateVariables(TypedDict):
|
class ProxyStateVariables(TypedDict):
|
||||||
|
@ -2680,9 +2682,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
|
||||||
enforce_rbac: bool = False
|
enforce_rbac: bool = False
|
||||||
roles_jwt_field: Optional[str] = None # v2 on role mappings
|
roles_jwt_field: Optional[str] = None # v2 on role mappings
|
||||||
role_mappings: Optional[List[RoleMapping]] = None
|
role_mappings: Optional[List[RoleMapping]] = None
|
||||||
object_id_jwt_field: Optional[str] = (
|
object_id_jwt_field: Optional[
|
||||||
None # can be either user / team, inferred from the role mapping
|
str
|
||||||
)
|
] = None # can be either user / team, inferred from the role mapping
|
||||||
scope_mappings: Optional[List[ScopeMapping]] = None
|
scope_mappings: Optional[List[ScopeMapping]] = None
|
||||||
enforce_scope_based_access: bool = False
|
enforce_scope_based_access: bool = False
|
||||||
enforce_team_based_model_access: bool = False
|
enforce_team_based_model_access: bool = False
|
||||||
|
|
|
@ -88,7 +88,7 @@ async def common_checks(
|
||||||
9. Check if request body is safe
|
9. Check if request body is safe
|
||||||
10. [OPTIONAL] Organization checks - is user_object.organization_id is set, run these checks
|
10. [OPTIONAL] Organization checks - is user_object.organization_id is set, run these checks
|
||||||
"""
|
"""
|
||||||
_model = request_body.get("model", None)
|
_model: Optional[str] = cast(Optional[str], request_body.get("model", None))
|
||||||
|
|
||||||
# 1. If team is blocked
|
# 1. If team is blocked
|
||||||
if team_object is not None and team_object.blocked is True:
|
if team_object is not None and team_object.blocked is True:
|
||||||
|
@ -112,7 +112,7 @@ async def common_checks(
|
||||||
)
|
)
|
||||||
|
|
||||||
## 2.1 If user can call model (if personal key)
|
## 2.1 If user can call model (if personal key)
|
||||||
if team_object is None and user_object is not None:
|
if _model and team_object is None and user_object is not None:
|
||||||
await can_user_call_model(
|
await can_user_call_model(
|
||||||
model=_model,
|
model=_model,
|
||||||
llm_router=llm_router,
|
llm_router=llm_router,
|
||||||
|
@ -644,6 +644,7 @@ async def get_user_object(
|
||||||
proxy_logging_obj: Optional[ProxyLogging] = None,
|
proxy_logging_obj: Optional[ProxyLogging] = None,
|
||||||
sso_user_id: Optional[str] = None,
|
sso_user_id: Optional[str] = None,
|
||||||
user_email: Optional[str] = None,
|
user_email: Optional[str] = None,
|
||||||
|
check_db_only: Optional[bool] = None,
|
||||||
) -> Optional[LiteLLM_UserTable]:
|
) -> Optional[LiteLLM_UserTable]:
|
||||||
"""
|
"""
|
||||||
- Check if user id in proxy User Table
|
- Check if user id in proxy User Table
|
||||||
|
@ -655,12 +656,13 @@ async def get_user_object(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# check if in cache
|
# check if in cache
|
||||||
cached_user_obj = await user_api_key_cache.async_get_cache(key=user_id)
|
if not check_db_only:
|
||||||
if cached_user_obj is not None:
|
cached_user_obj = await user_api_key_cache.async_get_cache(key=user_id)
|
||||||
if isinstance(cached_user_obj, dict):
|
if cached_user_obj is not None:
|
||||||
return LiteLLM_UserTable(**cached_user_obj)
|
if isinstance(cached_user_obj, dict):
|
||||||
elif isinstance(cached_user_obj, LiteLLM_UserTable):
|
return LiteLLM_UserTable(**cached_user_obj)
|
||||||
return cached_user_obj
|
elif isinstance(cached_user_obj, LiteLLM_UserTable):
|
||||||
|
return cached_user_obj
|
||||||
# else, check db
|
# else, check db
|
||||||
if prisma_client is None:
|
if prisma_client is None:
|
||||||
raise Exception("No db connected")
|
raise Exception("No db connected")
|
||||||
|
|
|
@ -199,9 +199,13 @@ class _ProxyDBLogger(CustomLogger):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
|
error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
|
||||||
model = kwargs.get("model", "")
|
model = kwargs.get("model", "")
|
||||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
metadata = get_litellm_metadata_from_kwargs(kwargs=kwargs)
|
||||||
|
litellm_metadata = kwargs.get("litellm_params", {}).get(
|
||||||
|
"litellm_metadata", {}
|
||||||
|
)
|
||||||
|
old_metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||||
call_type = kwargs.get("call_type", "")
|
call_type = kwargs.get("call_type", "")
|
||||||
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n call_type: {call_type}\n"
|
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n chosen_metadata: {metadata}\n litellm_metadata: {litellm_metadata}\n old_metadata: {old_metadata}\n call_type: {call_type}\n"
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
proxy_logging_obj.failed_tracking_alert(
|
proxy_logging_obj.failed_tracking_alert(
|
||||||
error_message=error_msg,
|
error_message=error_msg,
|
||||||
|
|
|
@ -433,14 +433,13 @@ class LiteLLMProxyRequestSetup:
|
||||||
) -> Optional[List[str]]:
|
) -> Optional[List[str]]:
|
||||||
tags = None
|
tags = None
|
||||||
|
|
||||||
if llm_router and llm_router.enable_tag_filtering is True:
|
# Check request headers for tags
|
||||||
# Check request headers for tags
|
if "x-litellm-tags" in headers:
|
||||||
if "x-litellm-tags" in headers:
|
if isinstance(headers["x-litellm-tags"], str):
|
||||||
if isinstance(headers["x-litellm-tags"], str):
|
_tags = headers["x-litellm-tags"].split(",")
|
||||||
_tags = headers["x-litellm-tags"].split(",")
|
tags = [tag.strip() for tag in _tags]
|
||||||
tags = [tag.strip() for tag in _tags]
|
elif isinstance(headers["x-litellm-tags"], list):
|
||||||
elif isinstance(headers["x-litellm-tags"], list):
|
tags = headers["x-litellm-tags"]
|
||||||
tags = headers["x-litellm-tags"]
|
|
||||||
# Check request body for tags
|
# Check request body for tags
|
||||||
if "tags" in data and isinstance(data["tags"], list):
|
if "tags" in data and isinstance(data["tags"], list):
|
||||||
tags = data["tags"]
|
tags = data["tags"]
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Set, Union
|
||||||
|
|
||||||
from fastapi import HTTPException, status
|
from fastapi import HTTPException, status
|
||||||
|
|
||||||
|
@ -39,6 +39,7 @@ def update_breakdown_metrics(
|
||||||
provider_metadata: Dict[str, Dict[str, Any]],
|
provider_metadata: Dict[str, Dict[str, Any]],
|
||||||
api_key_metadata: Dict[str, Dict[str, Any]],
|
api_key_metadata: Dict[str, Dict[str, Any]],
|
||||||
entity_id_field: Optional[str] = None,
|
entity_id_field: Optional[str] = None,
|
||||||
|
entity_metadata_field: Optional[Dict[str, dict]] = None,
|
||||||
) -> BreakdownMetrics:
|
) -> BreakdownMetrics:
|
||||||
"""Updates breakdown metrics for a single record using the existing update_metrics function"""
|
"""Updates breakdown metrics for a single record using the existing update_metrics function"""
|
||||||
|
|
||||||
|
@ -74,7 +75,8 @@ def update_breakdown_metrics(
|
||||||
metadata=KeyMetadata(
|
metadata=KeyMetadata(
|
||||||
key_alias=api_key_metadata.get(record.api_key, {}).get(
|
key_alias=api_key_metadata.get(record.api_key, {}).get(
|
||||||
"key_alias", None
|
"key_alias", None
|
||||||
)
|
),
|
||||||
|
team_id=api_key_metadata.get(record.api_key, {}).get("team_id", None),
|
||||||
), # Add any api_key-specific metadata here
|
), # Add any api_key-specific metadata here
|
||||||
)
|
)
|
||||||
breakdown.api_keys[record.api_key].metrics = update_metrics(
|
breakdown.api_keys[record.api_key].metrics = update_metrics(
|
||||||
|
@ -87,7 +89,10 @@ def update_breakdown_metrics(
|
||||||
if entity_value:
|
if entity_value:
|
||||||
if entity_value not in breakdown.entities:
|
if entity_value not in breakdown.entities:
|
||||||
breakdown.entities[entity_value] = MetricWithMetadata(
|
breakdown.entities[entity_value] = MetricWithMetadata(
|
||||||
metrics=SpendMetrics(), metadata={}
|
metrics=SpendMetrics(),
|
||||||
|
metadata=entity_metadata_field.get(entity_value, {})
|
||||||
|
if entity_metadata_field
|
||||||
|
else {},
|
||||||
)
|
)
|
||||||
breakdown.entities[entity_value].metrics = update_metrics(
|
breakdown.entities[entity_value].metrics = update_metrics(
|
||||||
breakdown.entities[entity_value].metrics, record
|
breakdown.entities[entity_value].metrics, record
|
||||||
|
@ -96,17 +101,32 @@ def update_breakdown_metrics(
|
||||||
return breakdown
|
return breakdown
|
||||||
|
|
||||||
|
|
||||||
|
async def get_api_key_metadata(
|
||||||
|
prisma_client: PrismaClient,
|
||||||
|
api_keys: Set[str],
|
||||||
|
) -> Dict[str, Dict[str, Any]]:
|
||||||
|
"""Update api key metadata for a single record."""
|
||||||
|
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||||
|
where={"token": {"in": list(api_keys)}}
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
k.token: {"key_alias": k.key_alias, "team_id": k.team_id} for k in key_records
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def get_daily_activity(
|
async def get_daily_activity(
|
||||||
prisma_client: Optional[PrismaClient],
|
prisma_client: Optional[PrismaClient],
|
||||||
table_name: str,
|
table_name: str,
|
||||||
entity_id_field: str,
|
entity_id_field: str,
|
||||||
entity_id: Optional[Union[str, List[str]]],
|
entity_id: Optional[Union[str, List[str]]],
|
||||||
|
entity_metadata_field: Optional[Dict[str, dict]],
|
||||||
start_date: Optional[str],
|
start_date: Optional[str],
|
||||||
end_date: Optional[str],
|
end_date: Optional[str],
|
||||||
model: Optional[str],
|
model: Optional[str],
|
||||||
api_key: Optional[str],
|
api_key: Optional[str],
|
||||||
page: int,
|
page: int,
|
||||||
page_size: int,
|
page_size: int,
|
||||||
|
exclude_entity_ids: Optional[List[str]] = None,
|
||||||
) -> SpendAnalyticsPaginatedResponse:
|
) -> SpendAnalyticsPaginatedResponse:
|
||||||
"""Common function to get daily activity for any entity type."""
|
"""Common function to get daily activity for any entity type."""
|
||||||
if prisma_client is None:
|
if prisma_client is None:
|
||||||
|
@ -134,11 +154,15 @@ async def get_daily_activity(
|
||||||
where_conditions["model"] = model
|
where_conditions["model"] = model
|
||||||
if api_key:
|
if api_key:
|
||||||
where_conditions["api_key"] = api_key
|
where_conditions["api_key"] = api_key
|
||||||
if entity_id:
|
if entity_id is not None:
|
||||||
if isinstance(entity_id, list):
|
if isinstance(entity_id, list):
|
||||||
where_conditions[entity_id_field] = {"in": entity_id}
|
where_conditions[entity_id_field] = {"in": entity_id}
|
||||||
else:
|
else:
|
||||||
where_conditions[entity_id_field] = entity_id
|
where_conditions[entity_id_field] = entity_id
|
||||||
|
if exclude_entity_ids:
|
||||||
|
where_conditions.setdefault(entity_id_field, {})["not"] = {
|
||||||
|
"in": exclude_entity_ids
|
||||||
|
}
|
||||||
|
|
||||||
# Get total count for pagination
|
# Get total count for pagination
|
||||||
total_count = await getattr(prisma_client.db, table_name).count(
|
total_count = await getattr(prisma_client.db, table_name).count(
|
||||||
|
@ -166,12 +190,7 @@ async def get_daily_activity(
|
||||||
model_metadata: Dict[str, Dict[str, Any]] = {}
|
model_metadata: Dict[str, Dict[str, Any]] = {}
|
||||||
provider_metadata: Dict[str, Dict[str, Any]] = {}
|
provider_metadata: Dict[str, Dict[str, Any]] = {}
|
||||||
if api_keys:
|
if api_keys:
|
||||||
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
api_key_metadata = await get_api_key_metadata(prisma_client, api_keys)
|
||||||
where={"token": {"in": list(api_keys)}}
|
|
||||||
)
|
|
||||||
api_key_metadata.update(
|
|
||||||
{k.token: {"key_alias": k.key_alias} for k in key_records}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Process results
|
# Process results
|
||||||
results = []
|
results = []
|
||||||
|
@ -198,6 +217,7 @@ async def get_daily_activity(
|
||||||
provider_metadata,
|
provider_metadata,
|
||||||
api_key_metadata,
|
api_key_metadata,
|
||||||
entity_id_field=entity_id_field,
|
entity_id_field=entity_id_field,
|
||||||
|
entity_metadata_field=entity_metadata_field,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update total metrics
|
# Update total metrics
|
||||||
|
|
|
@ -4,11 +4,19 @@ from litellm.proxy._types import (
|
||||||
GenerateKeyRequest,
|
GenerateKeyRequest,
|
||||||
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
|
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
|
||||||
LiteLLM_TeamTable,
|
LiteLLM_TeamTable,
|
||||||
|
LitellmUserRoles,
|
||||||
UserAPIKeyAuth,
|
UserAPIKeyAuth,
|
||||||
)
|
)
|
||||||
from litellm.proxy.utils import _premium_user_check
|
from litellm.proxy.utils import _premium_user_check
|
||||||
|
|
||||||
|
|
||||||
|
def _user_has_admin_view(user_api_key_dict: UserAPIKeyAuth) -> bool:
|
||||||
|
return (
|
||||||
|
user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
|
||||||
|
or user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _is_user_team_admin(
|
def _is_user_team_admin(
|
||||||
user_api_key_dict: UserAPIKeyAuth, team_obj: LiteLLM_TeamTable
|
user_api_key_dict: UserAPIKeyAuth, team_obj: LiteLLM_TeamTable
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
|
|
@ -25,6 +25,8 @@ from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.litellm_core_utils.duration_parser import duration_in_seconds
|
from litellm.litellm_core_utils.duration_parser import duration_in_seconds
|
||||||
from litellm.proxy._types import *
|
from litellm.proxy._types import *
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
|
from litellm.proxy.management_endpoints.common_daily_activity import get_daily_activity
|
||||||
|
from litellm.proxy.management_endpoints.common_utils import _user_has_admin_view
|
||||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||||
generate_key_helper_fn,
|
generate_key_helper_fn,
|
||||||
prepare_metadata_fields,
|
prepare_metadata_fields,
|
||||||
|
@ -34,8 +36,6 @@ from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
|
||||||
from litellm.proxy.utils import handle_exception_on_proxy
|
from litellm.proxy.utils import handle_exception_on_proxy
|
||||||
from litellm.types.proxy.management_endpoints.common_daily_activity import (
|
from litellm.types.proxy.management_endpoints.common_daily_activity import (
|
||||||
BreakdownMetrics,
|
BreakdownMetrics,
|
||||||
DailySpendData,
|
|
||||||
DailySpendMetadata,
|
|
||||||
KeyMetadata,
|
KeyMetadata,
|
||||||
KeyMetricWithMetadata,
|
KeyMetricWithMetadata,
|
||||||
LiteLLM_DailyUserSpend,
|
LiteLLM_DailyUserSpend,
|
||||||
|
@ -1382,136 +1382,22 @@ async def get_user_daily_activity(
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Build filter conditions
|
entity_id: Optional[str] = None
|
||||||
where_conditions: Dict[str, Any] = {
|
if not _user_has_admin_view(user_api_key_dict):
|
||||||
"date": {
|
entity_id = user_api_key_dict.user_id
|
||||||
"gte": start_date,
|
|
||||||
"lte": end_date,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if model:
|
return await get_daily_activity(
|
||||||
where_conditions["model"] = model
|
prisma_client=prisma_client,
|
||||||
if api_key:
|
table_name="litellm_dailyuserspend",
|
||||||
where_conditions["api_key"] = api_key
|
entity_id_field="user_id",
|
||||||
|
entity_id=entity_id,
|
||||||
if (
|
entity_metadata_field=None,
|
||||||
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
start_date=start_date,
|
||||||
and user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY
|
end_date=end_date,
|
||||||
):
|
model=model,
|
||||||
where_conditions[
|
api_key=api_key,
|
||||||
"user_id"
|
page=page,
|
||||||
] = user_api_key_dict.user_id # only allow access to own data
|
page_size=page_size,
|
||||||
|
|
||||||
# Get total count for pagination
|
|
||||||
total_count = await prisma_client.db.litellm_dailyuserspend.count(
|
|
||||||
where=where_conditions
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fetch paginated results
|
|
||||||
daily_spend_data = await prisma_client.db.litellm_dailyuserspend.find_many(
|
|
||||||
where=where_conditions,
|
|
||||||
order=[
|
|
||||||
{"date": "desc"},
|
|
||||||
],
|
|
||||||
skip=(page - 1) * page_size,
|
|
||||||
take=page_size,
|
|
||||||
)
|
|
||||||
|
|
||||||
daily_spend_data_pydantic_list = [
|
|
||||||
LiteLLM_DailyUserSpend(**record.model_dump()) for record in daily_spend_data
|
|
||||||
]
|
|
||||||
|
|
||||||
# Get all unique API keys from the spend data
|
|
||||||
api_keys = set()
|
|
||||||
for record in daily_spend_data_pydantic_list:
|
|
||||||
if record.api_key:
|
|
||||||
api_keys.add(record.api_key)
|
|
||||||
|
|
||||||
# Fetch key aliases in bulk
|
|
||||||
|
|
||||||
api_key_metadata: Dict[str, Dict[str, Any]] = {}
|
|
||||||
model_metadata: Dict[str, Dict[str, Any]] = {}
|
|
||||||
provider_metadata: Dict[str, Dict[str, Any]] = {}
|
|
||||||
if api_keys:
|
|
||||||
key_records = await prisma_client.db.litellm_verificationtoken.find_many(
|
|
||||||
where={"token": {"in": list(api_keys)}}
|
|
||||||
)
|
|
||||||
api_key_metadata.update(
|
|
||||||
{k.token: {"key_alias": k.key_alias} for k in key_records}
|
|
||||||
)
|
|
||||||
# Process results
|
|
||||||
results = []
|
|
||||||
total_metrics = SpendMetrics()
|
|
||||||
|
|
||||||
# Group data by date and other dimensions
|
|
||||||
|
|
||||||
grouped_data: Dict[str, Dict[str, Any]] = {}
|
|
||||||
for record in daily_spend_data_pydantic_list:
|
|
||||||
date_str = record.date
|
|
||||||
if date_str not in grouped_data:
|
|
||||||
grouped_data[date_str] = {
|
|
||||||
"metrics": SpendMetrics(),
|
|
||||||
"breakdown": BreakdownMetrics(),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Update metrics
|
|
||||||
grouped_data[date_str]["metrics"] = update_metrics(
|
|
||||||
grouped_data[date_str]["metrics"], record
|
|
||||||
)
|
|
||||||
# Update breakdowns
|
|
||||||
grouped_data[date_str]["breakdown"] = update_breakdown_metrics(
|
|
||||||
grouped_data[date_str]["breakdown"],
|
|
||||||
record,
|
|
||||||
model_metadata,
|
|
||||||
provider_metadata,
|
|
||||||
api_key_metadata,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update total metrics
|
|
||||||
total_metrics.spend += record.spend
|
|
||||||
total_metrics.prompt_tokens += record.prompt_tokens
|
|
||||||
total_metrics.completion_tokens += record.completion_tokens
|
|
||||||
total_metrics.total_tokens += (
|
|
||||||
record.prompt_tokens + record.completion_tokens
|
|
||||||
)
|
|
||||||
total_metrics.cache_read_input_tokens += record.cache_read_input_tokens
|
|
||||||
total_metrics.cache_creation_input_tokens += (
|
|
||||||
record.cache_creation_input_tokens
|
|
||||||
)
|
|
||||||
total_metrics.api_requests += record.api_requests
|
|
||||||
total_metrics.successful_requests += record.successful_requests
|
|
||||||
total_metrics.failed_requests += record.failed_requests
|
|
||||||
|
|
||||||
# Convert grouped data to response format
|
|
||||||
for date_str, data in grouped_data.items():
|
|
||||||
results.append(
|
|
||||||
DailySpendData(
|
|
||||||
date=datetime.strptime(date_str, "%Y-%m-%d").date(),
|
|
||||||
metrics=data["metrics"],
|
|
||||||
breakdown=data["breakdown"],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort results by date
|
|
||||||
results.sort(key=lambda x: x.date, reverse=True)
|
|
||||||
|
|
||||||
return SpendAnalyticsPaginatedResponse(
|
|
||||||
results=results,
|
|
||||||
metadata=DailySpendMetadata(
|
|
||||||
total_spend=total_metrics.spend,
|
|
||||||
total_prompt_tokens=total_metrics.prompt_tokens,
|
|
||||||
total_completion_tokens=total_metrics.completion_tokens,
|
|
||||||
total_tokens=total_metrics.total_tokens,
|
|
||||||
total_api_requests=total_metrics.api_requests,
|
|
||||||
total_successful_requests=total_metrics.successful_requests,
|
|
||||||
total_failed_requests=total_metrics.failed_requests,
|
|
||||||
total_cache_read_input_tokens=total_metrics.cache_read_input_tokens,
|
|
||||||
total_cache_creation_input_tokens=total_metrics.cache_creation_input_tokens,
|
|
||||||
page=page,
|
|
||||||
total_pages=-(-total_count // page_size), # Ceiling division
|
|
||||||
has_more=(page * page_size) < total_count,
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -577,12 +577,16 @@ async def generate_key_fn( # noqa: PLR0915
|
||||||
request_type="key", **data_json, table_name="key"
|
request_type="key", **data_json, table_name="key"
|
||||||
)
|
)
|
||||||
|
|
||||||
response["soft_budget"] = (
|
response[
|
||||||
data.soft_budget
|
"soft_budget"
|
||||||
) # include the user-input soft budget in the response
|
] = data.soft_budget # include the user-input soft budget in the response
|
||||||
|
|
||||||
response = GenerateKeyResponse(**response)
|
response = GenerateKeyResponse(**response)
|
||||||
|
|
||||||
|
response.token = (
|
||||||
|
response.token_id
|
||||||
|
) # remap token to use the hash, and leave the key in the `key` field [TODO]: clean up generate_key_helper_fn to do this
|
||||||
|
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
KeyManagementEventHooks.async_key_generated_hook(
|
KeyManagementEventHooks.async_key_generated_hook(
|
||||||
data=data,
|
data=data,
|
||||||
|
@ -1470,10 +1474,10 @@ async def delete_verification_tokens(
|
||||||
try:
|
try:
|
||||||
if prisma_client:
|
if prisma_client:
|
||||||
tokens = [_hash_token_if_needed(token=key) for key in tokens]
|
tokens = [_hash_token_if_needed(token=key) for key in tokens]
|
||||||
_keys_being_deleted: List[LiteLLM_VerificationToken] = (
|
_keys_being_deleted: List[
|
||||||
await prisma_client.db.litellm_verificationtoken.find_many(
|
LiteLLM_VerificationToken
|
||||||
where={"token": {"in": tokens}}
|
] = await prisma_client.db.litellm_verificationtoken.find_many(
|
||||||
)
|
where={"token": {"in": tokens}}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assuming 'db' is your Prisma Client instance
|
# Assuming 'db' is your Prisma Client instance
|
||||||
|
@ -1575,9 +1579,9 @@ async def _rotate_master_key(
|
||||||
from litellm.proxy.proxy_server import proxy_config
|
from litellm.proxy.proxy_server import proxy_config
|
||||||
|
|
||||||
try:
|
try:
|
||||||
models: Optional[List] = (
|
models: Optional[
|
||||||
await prisma_client.db.litellm_proxymodeltable.find_many()
|
List
|
||||||
)
|
] = await prisma_client.db.litellm_proxymodeltable.find_many()
|
||||||
except Exception:
|
except Exception:
|
||||||
models = None
|
models = None
|
||||||
# 2. process model table
|
# 2. process model table
|
||||||
|
@ -1864,11 +1868,11 @@ async def validate_key_list_check(
|
||||||
param="user_id",
|
param="user_id",
|
||||||
code=status.HTTP_403_FORBIDDEN,
|
code=status.HTTP_403_FORBIDDEN,
|
||||||
)
|
)
|
||||||
complete_user_info_db_obj: Optional[BaseModel] = (
|
complete_user_info_db_obj: Optional[
|
||||||
await prisma_client.db.litellm_usertable.find_unique(
|
BaseModel
|
||||||
where={"user_id": user_api_key_dict.user_id},
|
] = await prisma_client.db.litellm_usertable.find_unique(
|
||||||
include={"organization_memberships": True},
|
where={"user_id": user_api_key_dict.user_id},
|
||||||
)
|
include={"organization_memberships": True},
|
||||||
)
|
)
|
||||||
|
|
||||||
if complete_user_info_db_obj is None:
|
if complete_user_info_db_obj is None:
|
||||||
|
@ -1929,10 +1933,10 @@ async def get_admin_team_ids(
|
||||||
if complete_user_info is None:
|
if complete_user_info is None:
|
||||||
return []
|
return []
|
||||||
# Get all teams that user is an admin of
|
# Get all teams that user is an admin of
|
||||||
teams: Optional[List[BaseModel]] = (
|
teams: Optional[
|
||||||
await prisma_client.db.litellm_teamtable.find_many(
|
List[BaseModel]
|
||||||
where={"team_id": {"in": complete_user_info.teams}}
|
] = await prisma_client.db.litellm_teamtable.find_many(
|
||||||
)
|
where={"team_id": {"in": complete_user_info.teams}}
|
||||||
)
|
)
|
||||||
if teams is None:
|
if teams is None:
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -12,7 +12,7 @@ All /tag management endpoints
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@ from litellm.proxy.management_endpoints.common_daily_activity import (
|
||||||
get_daily_activity,
|
get_daily_activity,
|
||||||
)
|
)
|
||||||
from litellm.types.tag_management import (
|
from litellm.types.tag_management import (
|
||||||
|
LiteLLM_DailyTagSpendTable,
|
||||||
TagConfig,
|
TagConfig,
|
||||||
TagDeleteRequest,
|
TagDeleteRequest,
|
||||||
TagInfoRequest,
|
TagInfoRequest,
|
||||||
|
@ -301,6 +302,7 @@ async def info_tag(
|
||||||
"/tag/list",
|
"/tag/list",
|
||||||
tags=["tag management"],
|
tags=["tag management"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
response_model=List[TagConfig],
|
||||||
)
|
)
|
||||||
async def list_tags(
|
async def list_tags(
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
@ -314,9 +316,33 @@ async def list_tags(
|
||||||
raise HTTPException(status_code=500, detail="Database not connected")
|
raise HTTPException(status_code=500, detail="Database not connected")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
## QUERY STORED TAGS ##
|
||||||
tags_config = await _get_tags_config(prisma_client)
|
tags_config = await _get_tags_config(prisma_client)
|
||||||
list_of_tags = list(tags_config.values())
|
list_of_tags = list(tags_config.values())
|
||||||
return list_of_tags
|
|
||||||
|
## QUERY DYNAMIC TAGS ##
|
||||||
|
dynamic_tags = await prisma_client.db.litellm_dailytagspend.find_many(
|
||||||
|
distinct=["tag"],
|
||||||
|
)
|
||||||
|
|
||||||
|
dynamic_tags_list = [
|
||||||
|
LiteLLM_DailyTagSpendTable(**dynamic_tag.model_dump())
|
||||||
|
for dynamic_tag in dynamic_tags
|
||||||
|
]
|
||||||
|
|
||||||
|
dynamic_tag_config = [
|
||||||
|
TagConfig(
|
||||||
|
name=tag.tag,
|
||||||
|
description="This is just a spend tag that was passed dynamically in a request. It does not control any LLM models.",
|
||||||
|
models=None,
|
||||||
|
created_at=tag.created_at.isoformat(),
|
||||||
|
updated_at=tag.updated_at.isoformat(),
|
||||||
|
)
|
||||||
|
for tag in dynamic_tags_list
|
||||||
|
if tag.tag not in tags_config
|
||||||
|
]
|
||||||
|
|
||||||
|
return list_of_tags + dynamic_tag_config
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@ -400,6 +426,7 @@ async def get_tag_daily_activity(
|
||||||
table_name="litellm_dailytagspend",
|
table_name="litellm_dailytagspend",
|
||||||
entity_id_field="tag",
|
entity_id_field="tag",
|
||||||
entity_id=tag_list,
|
entity_id=tag_list,
|
||||||
|
entity_metadata_field=None,
|
||||||
start_date=start_date,
|
start_date=start_date,
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
model=model,
|
model=model,
|
||||||
|
|
|
@ -56,11 +56,13 @@ from litellm.proxy._types import (
|
||||||
from litellm.proxy.auth.auth_checks import (
|
from litellm.proxy.auth.auth_checks import (
|
||||||
allowed_route_check_inside_route,
|
allowed_route_check_inside_route,
|
||||||
get_team_object,
|
get_team_object,
|
||||||
|
get_user_object,
|
||||||
)
|
)
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
from litellm.proxy.management_endpoints.common_utils import (
|
from litellm.proxy.management_endpoints.common_utils import (
|
||||||
_is_user_team_admin,
|
_is_user_team_admin,
|
||||||
_set_object_metadata_field,
|
_set_object_metadata_field,
|
||||||
|
_user_has_admin_view,
|
||||||
)
|
)
|
||||||
from litellm.proxy.management_endpoints.tag_management_endpoints import (
|
from litellm.proxy.management_endpoints.tag_management_endpoints import (
|
||||||
get_daily_activity,
|
get_daily_activity,
|
||||||
|
@ -2091,7 +2093,6 @@ async def update_team_member_permissions(
|
||||||
"/team/daily/activity",
|
"/team/daily/activity",
|
||||||
response_model=SpendAnalyticsPaginatedResponse,
|
response_model=SpendAnalyticsPaginatedResponse,
|
||||||
tags=["team management"],
|
tags=["team management"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
|
||||||
)
|
)
|
||||||
async def get_team_daily_activity(
|
async def get_team_daily_activity(
|
||||||
team_ids: Optional[str] = None,
|
team_ids: Optional[str] = None,
|
||||||
|
@ -2101,6 +2102,8 @@ async def get_team_daily_activity(
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
page: int = 1,
|
page: int = 1,
|
||||||
page_size: int = 10,
|
page_size: int = 10,
|
||||||
|
exclude_team_ids: Optional[str] = None,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Get daily activity for specific teams or all teams.
|
Get daily activity for specific teams or all teams.
|
||||||
|
@ -2113,20 +2116,82 @@ async def get_team_daily_activity(
|
||||||
api_key (Optional[str]): Filter by API key.
|
api_key (Optional[str]): Filter by API key.
|
||||||
page (int): Page number for pagination.
|
page (int): Page number for pagination.
|
||||||
page_size (int): Number of items per page.
|
page_size (int): Number of items per page.
|
||||||
|
exclude_team_ids (Optional[str]): Comma-separated list of team IDs to exclude.
|
||||||
Returns:
|
Returns:
|
||||||
SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
|
SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import prisma_client
|
from litellm.proxy.proxy_server import (
|
||||||
|
prisma_client,
|
||||||
|
proxy_logging_obj,
|
||||||
|
user_api_key_cache,
|
||||||
|
)
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
# Convert comma-separated tags string to list if provided
|
# Convert comma-separated tags string to list if provided
|
||||||
team_ids_list = team_ids.split(",") if team_ids else None
|
team_ids_list = team_ids.split(",") if team_ids else None
|
||||||
|
exclude_team_ids_list: Optional[List[str]] = None
|
||||||
|
|
||||||
|
if exclude_team_ids:
|
||||||
|
exclude_team_ids_list = (
|
||||||
|
exclude_team_ids.split(",") if exclude_team_ids else None
|
||||||
|
)
|
||||||
|
|
||||||
|
if not _user_has_admin_view(user_api_key_dict):
|
||||||
|
user_info = await get_user_object(
|
||||||
|
user_id=user_api_key_dict.user_id,
|
||||||
|
prisma_client=prisma_client,
|
||||||
|
user_id_upsert=False,
|
||||||
|
user_api_key_cache=user_api_key_cache,
|
||||||
|
parent_otel_span=user_api_key_dict.parent_otel_span,
|
||||||
|
proxy_logging_obj=proxy_logging_obj,
|
||||||
|
check_db_only=True,
|
||||||
|
)
|
||||||
|
if user_info is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={
|
||||||
|
"error": "User= {} not found".format(user_api_key_dict.user_id)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if team_ids_list is None:
|
||||||
|
team_ids_list = user_info.teams
|
||||||
|
else:
|
||||||
|
# check if all team_ids are in user_info.teams
|
||||||
|
for team_id in team_ids_list:
|
||||||
|
if team_id not in user_info.teams:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={
|
||||||
|
"error": "User does not belong to Team= {}. Call `/user/info` to see user's teams".format(
|
||||||
|
team_id
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
## Fetch team aliases
|
||||||
|
where_condition = {}
|
||||||
|
if team_ids_list:
|
||||||
|
where_condition["team_id"] = {"in": list(team_ids_list)}
|
||||||
|
team_aliases = await prisma_client.db.litellm_teamtable.find_many(
|
||||||
|
where=where_condition
|
||||||
|
)
|
||||||
|
team_alias_metadata = {
|
||||||
|
t.team_id: {"team_alias": t.team_alias} for t in team_aliases
|
||||||
|
}
|
||||||
|
|
||||||
return await get_daily_activity(
|
return await get_daily_activity(
|
||||||
prisma_client=prisma_client,
|
prisma_client=prisma_client,
|
||||||
table_name="litellm_dailyteamspend",
|
table_name="litellm_dailyteamspend",
|
||||||
entity_id_field="team_id",
|
entity_id_field="team_id",
|
||||||
entity_id=team_ids_list,
|
entity_id=team_ids_list,
|
||||||
|
entity_metadata_field=team_alias_metadata,
|
||||||
|
exclude_entity_ids=exclude_team_ids_list,
|
||||||
start_date=start_date,
|
start_date=start_date,
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
model=model,
|
model=model,
|
||||||
|
|
|
@ -553,7 +553,7 @@ async def auth_callback(request: Request): # noqa: PLR0915
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
if user_id is not None and isinstance(user_id, str):
|
if user_id is not None and isinstance(user_id, str):
|
||||||
litellm_dashboard_ui += "?userID=" + user_id
|
litellm_dashboard_ui += "?login=success"
|
||||||
redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
||||||
redirect_response.set_cookie(key="token", value=jwt_token, secure=True)
|
redirect_response.set_cookie(key="token", value=jwt_token, secure=True)
|
||||||
return redirect_response
|
return redirect_response
|
||||||
|
@ -592,9 +592,9 @@ async def insert_sso_user(
|
||||||
if user_defined_values.get("max_budget") is None:
|
if user_defined_values.get("max_budget") is None:
|
||||||
user_defined_values["max_budget"] = litellm.max_internal_user_budget
|
user_defined_values["max_budget"] = litellm.max_internal_user_budget
|
||||||
if user_defined_values.get("budget_duration") is None:
|
if user_defined_values.get("budget_duration") is None:
|
||||||
user_defined_values["budget_duration"] = (
|
user_defined_values[
|
||||||
litellm.internal_user_budget_duration
|
"budget_duration"
|
||||||
)
|
] = litellm.internal_user_budget_duration
|
||||||
|
|
||||||
if user_defined_values["user_role"] is None:
|
if user_defined_values["user_role"] is None:
|
||||||
user_defined_values["user_role"] = LitellmUserRoles.INTERNAL_USER_VIEW_ONLY
|
user_defined_values["user_role"] = LitellmUserRoles.INTERNAL_USER_VIEW_ONLY
|
||||||
|
@ -787,9 +787,9 @@ class SSOAuthenticationHandler:
|
||||||
if state:
|
if state:
|
||||||
redirect_params["state"] = state
|
redirect_params["state"] = state
|
||||||
elif "okta" in generic_authorization_endpoint:
|
elif "okta" in generic_authorization_endpoint:
|
||||||
redirect_params["state"] = (
|
redirect_params[
|
||||||
uuid.uuid4().hex
|
"state"
|
||||||
) # set state param for okta - required
|
] = uuid.uuid4().hex # set state param for okta - required
|
||||||
return await generic_sso.get_login_redirect(**redirect_params) # type: ignore
|
return await generic_sso.get_login_redirect(**redirect_params) # type: ignore
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Unknown SSO provider. Please setup SSO with client IDs https://docs.litellm.ai/docs/proxy/admin_ui_sso"
|
"Unknown SSO provider. Please setup SSO with client IDs https://docs.litellm.ai/docs/proxy/admin_ui_sso"
|
||||||
|
@ -1023,7 +1023,7 @@ class MicrosoftSSOHandler:
|
||||||
original_msft_result = (
|
original_msft_result = (
|
||||||
await microsoft_sso.verify_and_process(
|
await microsoft_sso.verify_and_process(
|
||||||
request=request,
|
request=request,
|
||||||
convert_response=False,
|
convert_response=False, # type: ignore
|
||||||
)
|
)
|
||||||
or {}
|
or {}
|
||||||
)
|
)
|
||||||
|
@ -1034,9 +1034,9 @@ class MicrosoftSSOHandler:
|
||||||
|
|
||||||
# if user is trying to get the raw sso response for debugging, return the raw sso response
|
# if user is trying to get the raw sso response for debugging, return the raw sso response
|
||||||
if return_raw_sso_response:
|
if return_raw_sso_response:
|
||||||
original_msft_result[MicrosoftSSOHandler.GRAPH_API_RESPONSE_KEY] = (
|
original_msft_result[
|
||||||
user_team_ids
|
MicrosoftSSOHandler.GRAPH_API_RESPONSE_KEY
|
||||||
)
|
] = user_team_ids
|
||||||
return original_msft_result or {}
|
return original_msft_result or {}
|
||||||
|
|
||||||
result = MicrosoftSSOHandler.openid_from_response(
|
result = MicrosoftSSOHandler.openid_from_response(
|
||||||
|
@ -1086,12 +1086,13 @@ class MicrosoftSSOHandler:
|
||||||
service_principal_group_ids: Optional[List[str]] = []
|
service_principal_group_ids: Optional[List[str]] = []
|
||||||
service_principal_teams: Optional[List[MicrosoftServicePrincipalTeam]] = []
|
service_principal_teams: Optional[List[MicrosoftServicePrincipalTeam]] = []
|
||||||
if service_principal_id:
|
if service_principal_id:
|
||||||
service_principal_group_ids, service_principal_teams = (
|
(
|
||||||
await MicrosoftSSOHandler.get_group_ids_from_service_principal(
|
service_principal_group_ids,
|
||||||
service_principal_id=service_principal_id,
|
service_principal_teams,
|
||||||
async_client=async_client,
|
) = await MicrosoftSSOHandler.get_group_ids_from_service_principal(
|
||||||
access_token=access_token,
|
service_principal_id=service_principal_id,
|
||||||
)
|
async_client=async_client,
|
||||||
|
access_token=access_token,
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"Service principal group IDs: {service_principal_group_ids}"
|
f"Service principal group IDs: {service_principal_group_ids}"
|
||||||
|
@ -1103,9 +1104,9 @@ class MicrosoftSSOHandler:
|
||||||
|
|
||||||
# Fetch user membership from Microsoft Graph API
|
# Fetch user membership from Microsoft Graph API
|
||||||
all_group_ids = []
|
all_group_ids = []
|
||||||
next_link: Optional[str] = (
|
next_link: Optional[
|
||||||
MicrosoftSSOHandler.graph_api_user_groups_endpoint
|
str
|
||||||
)
|
] = MicrosoftSSOHandler.graph_api_user_groups_endpoint
|
||||||
auth_headers = {"Authorization": f"Bearer {access_token}"}
|
auth_headers = {"Authorization": f"Bearer {access_token}"}
|
||||||
page_count = 0
|
page_count = 0
|
||||||
|
|
||||||
|
@ -1304,7 +1305,7 @@ class GoogleSSOHandler:
|
||||||
return (
|
return (
|
||||||
await google_sso.verify_and_process(
|
await google_sso.verify_and_process(
|
||||||
request=request,
|
request=request,
|
||||||
convert_response=False,
|
convert_response=False, # type: ignore
|
||||||
)
|
)
|
||||||
or {}
|
or {}
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,6 +1,13 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: fake-openai-endpoint
|
- model_name: openai/*
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/fake
|
model: openai/*
|
||||||
api_key: fake-key
|
- model_name: anthropic/*
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
litellm_params:
|
||||||
|
model: anthropic/*
|
||||||
|
- model_name: gemini/*
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/*
|
||||||
|
litellm_settings:
|
||||||
|
drop_params: true
|
||||||
|
|
||||||
|
|
|
@ -804,9 +804,9 @@ model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
|
||||||
dual_cache=user_api_key_cache
|
dual_cache=user_api_key_cache
|
||||||
)
|
)
|
||||||
litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
|
litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
|
||||||
redis_usage_cache: Optional[RedisCache] = (
|
redis_usage_cache: Optional[
|
||||||
None # redis cache used for tracking spend, tpm/rpm limits
|
RedisCache
|
||||||
)
|
] = None # redis cache used for tracking spend, tpm/rpm limits
|
||||||
user_custom_auth = None
|
user_custom_auth = None
|
||||||
user_custom_key_generate = None
|
user_custom_key_generate = None
|
||||||
user_custom_sso = None
|
user_custom_sso = None
|
||||||
|
@ -1132,9 +1132,9 @@ async def update_cache( # noqa: PLR0915
|
||||||
_id = "team_id:{}".format(team_id)
|
_id = "team_id:{}".format(team_id)
|
||||||
try:
|
try:
|
||||||
# Fetch the existing cost for the given user
|
# Fetch the existing cost for the given user
|
||||||
existing_spend_obj: Optional[LiteLLM_TeamTable] = (
|
existing_spend_obj: Optional[
|
||||||
await user_api_key_cache.async_get_cache(key=_id)
|
LiteLLM_TeamTable
|
||||||
)
|
] = await user_api_key_cache.async_get_cache(key=_id)
|
||||||
if existing_spend_obj is None:
|
if existing_spend_obj is None:
|
||||||
# do nothing if team not in api key cache
|
# do nothing if team not in api key cache
|
||||||
return
|
return
|
||||||
|
@ -2806,9 +2806,9 @@ async def initialize( # noqa: PLR0915
|
||||||
user_api_base = api_base
|
user_api_base = api_base
|
||||||
dynamic_config[user_model]["api_base"] = api_base
|
dynamic_config[user_model]["api_base"] = api_base
|
||||||
if api_version:
|
if api_version:
|
||||||
os.environ["AZURE_API_VERSION"] = (
|
os.environ[
|
||||||
api_version # set this for azure - litellm can read this from the env
|
"AZURE_API_VERSION"
|
||||||
)
|
] = api_version # set this for azure - litellm can read this from the env
|
||||||
if max_tokens: # model-specific param
|
if max_tokens: # model-specific param
|
||||||
dynamic_config[user_model]["max_tokens"] = max_tokens
|
dynamic_config[user_model]["max_tokens"] = max_tokens
|
||||||
if temperature: # model-specific param
|
if temperature: # model-specific param
|
||||||
|
@ -6160,6 +6160,7 @@ async def model_info_v1( # noqa: PLR0915
|
||||||
proxy_model_list=proxy_model_list,
|
proxy_model_list=proxy_model_list,
|
||||||
user_model=user_model,
|
user_model=user_model,
|
||||||
infer_model_from_keys=general_settings.get("infer_model_from_keys", False),
|
infer_model_from_keys=general_settings.get("infer_model_from_keys", False),
|
||||||
|
llm_router=llm_router,
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(all_models_str) > 0:
|
if len(all_models_str) > 0:
|
||||||
|
@ -6184,6 +6185,7 @@ def _get_model_group_info(
|
||||||
llm_router: Router, all_models_str: List[str], model_group: Optional[str]
|
llm_router: Router, all_models_str: List[str], model_group: Optional[str]
|
||||||
) -> List[ModelGroupInfo]:
|
) -> List[ModelGroupInfo]:
|
||||||
model_groups: List[ModelGroupInfo] = []
|
model_groups: List[ModelGroupInfo] = []
|
||||||
|
|
||||||
for model in all_models_str:
|
for model in all_models_str:
|
||||||
if model_group is not None and model_group != model:
|
if model_group is not None and model_group != model:
|
||||||
continue
|
continue
|
||||||
|
@ -6191,6 +6193,12 @@ def _get_model_group_info(
|
||||||
_model_group_info = llm_router.get_model_group_info(model_group=model)
|
_model_group_info = llm_router.get_model_group_info(model_group=model)
|
||||||
if _model_group_info is not None:
|
if _model_group_info is not None:
|
||||||
model_groups.append(_model_group_info)
|
model_groups.append(_model_group_info)
|
||||||
|
else:
|
||||||
|
model_group_info = ModelGroupInfo(
|
||||||
|
model_group=model,
|
||||||
|
providers=[],
|
||||||
|
)
|
||||||
|
model_groups.append(model_group_info)
|
||||||
return model_groups
|
return model_groups
|
||||||
|
|
||||||
|
|
||||||
|
@ -6387,8 +6395,8 @@ async def model_group_info(
|
||||||
proxy_model_list=proxy_model_list,
|
proxy_model_list=proxy_model_list,
|
||||||
user_model=user_model,
|
user_model=user_model,
|
||||||
infer_model_from_keys=general_settings.get("infer_model_from_keys", False),
|
infer_model_from_keys=general_settings.get("infer_model_from_keys", False),
|
||||||
|
llm_router=llm_router,
|
||||||
)
|
)
|
||||||
|
|
||||||
model_groups: List[ModelGroupInfo] = _get_model_group_info(
|
model_groups: List[ModelGroupInfo] = _get_model_group_info(
|
||||||
llm_router=llm_router, all_models_str=all_models_str, model_group=model_group
|
llm_router=llm_router, all_models_str=all_models_str, model_group=model_group
|
||||||
)
|
)
|
||||||
|
@ -6807,7 +6815,7 @@ async def login(request: Request): # noqa: PLR0915
|
||||||
master_key,
|
master_key,
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
litellm_dashboard_ui += "?userID=" + user_id
|
litellm_dashboard_ui += "?login=success"
|
||||||
redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
redirect_response = RedirectResponse(url=litellm_dashboard_ui, status_code=303)
|
||||||
redirect_response.set_cookie(key="token", value=jwt_token)
|
redirect_response.set_cookie(key="token", value=jwt_token)
|
||||||
return redirect_response
|
return redirect_response
|
||||||
|
@ -6883,7 +6891,7 @@ async def login(request: Request): # noqa: PLR0915
|
||||||
master_key,
|
master_key,
|
||||||
algorithm="HS256",
|
algorithm="HS256",
|
||||||
)
|
)
|
||||||
litellm_dashboard_ui += "?userID=" + user_id
|
litellm_dashboard_ui += "?login=success"
|
||||||
redirect_response = RedirectResponse(
|
redirect_response = RedirectResponse(
|
||||||
url=litellm_dashboard_ui, status_code=303
|
url=litellm_dashboard_ui, status_code=303
|
||||||
)
|
)
|
||||||
|
@ -7750,9 +7758,9 @@ async def get_config_list(
|
||||||
hasattr(sub_field_info, "description")
|
hasattr(sub_field_info, "description")
|
||||||
and sub_field_info.description is not None
|
and sub_field_info.description is not None
|
||||||
):
|
):
|
||||||
nested_fields[idx].field_description = (
|
nested_fields[
|
||||||
sub_field_info.description
|
idx
|
||||||
)
|
].field_description = sub_field_info.description
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
_stored_in_db = None
|
_stored_in_db = None
|
||||||
|
|
|
@ -1919,9 +1919,7 @@ async def view_spend_logs( # noqa: PLR0915
|
||||||
):
|
):
|
||||||
result: dict = {}
|
result: dict = {}
|
||||||
for record in response:
|
for record in response:
|
||||||
dt_object = datetime.strptime(
|
dt_object = datetime.strptime(str(record["startTime"]), "%Y-%m-%dT%H:%M:%S.%fZ") # type: ignore
|
||||||
str(record["startTime"]), "%Y-%m-%dT%H:%M:%S.%fZ" # type: ignore
|
|
||||||
) # type: ignore
|
|
||||||
date = dt_object.date()
|
date = dt_object.date()
|
||||||
if date not in result:
|
if date not in result:
|
||||||
result[date] = {"users": {}, "models": {}}
|
result[date] = {"users": {}, "models": {}}
|
||||||
|
@ -2097,8 +2095,7 @@ async def global_spend_refresh():
|
||||||
try:
|
try:
|
||||||
resp = await prisma_client.db.query_raw(sql_query)
|
resp = await prisma_client.db.query_raw(sql_query)
|
||||||
|
|
||||||
assert resp[0]["relkind"] == "m"
|
return resp[0]["relkind"] == "m"
|
||||||
return True
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -2396,9 +2393,21 @@ async def global_spend_keys(
|
||||||
return response
|
return response
|
||||||
if prisma_client is None:
|
if prisma_client is None:
|
||||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
sql_query = f"""SELECT * FROM "Last30dKeysBySpend" LIMIT {limit};"""
|
sql_query = """SELECT * FROM "Last30dKeysBySpend";"""
|
||||||
|
|
||||||
response = await prisma_client.db.query_raw(query=sql_query)
|
if limit is None:
|
||||||
|
response = await prisma_client.db.query_raw(sql_query)
|
||||||
|
return response
|
||||||
|
try:
|
||||||
|
limit = int(limit)
|
||||||
|
if limit < 1:
|
||||||
|
raise ValueError("Limit must be greater than 0")
|
||||||
|
sql_query = """SELECT * FROM "Last30dKeysBySpend" LIMIT $1 ;"""
|
||||||
|
response = await prisma_client.db.query_raw(sql_query, limit)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=422, detail={"error": f"Invalid limit: {limit}, error: {e}"}
|
||||||
|
) from e
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -2646,9 +2655,9 @@ async def global_spend_models(
|
||||||
if prisma_client is None:
|
if prisma_client is None:
|
||||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
|
||||||
sql_query = f"""SELECT * FROM "Last30dModelsBySpend" LIMIT {limit};"""
|
sql_query = """SELECT * FROM "Last30dModelsBySpend" LIMIT $1 ;"""
|
||||||
|
|
||||||
response = await prisma_client.db.query_raw(query=sql_query)
|
response = await prisma_client.db.query_raw(sql_query, int(limit))
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
115
litellm/responses/litellm_completion_transformation/handler.py
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
"""
|
||||||
|
Handler for transforming responses api requests to litellm.completion requests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, Coroutine, Optional, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.responses.litellm_completion_transformation.streaming_iterator import (
|
||||||
|
LiteLLMCompletionStreamingIterator,
|
||||||
|
)
|
||||||
|
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||||
|
LiteLLMCompletionResponsesConfig,
|
||||||
|
)
|
||||||
|
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
ResponseInputParam,
|
||||||
|
ResponsesAPIOptionalRequestParams,
|
||||||
|
ResponsesAPIResponse,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import ModelResponse
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMCompletionTransformationHandler:
|
||||||
|
|
||||||
|
def response_api_handler(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
input: Union[str, ResponseInputParam],
|
||||||
|
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
_is_async: bool = False,
|
||||||
|
stream: Optional[bool] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[
|
||||||
|
ResponsesAPIResponse,
|
||||||
|
BaseResponsesAPIStreamingIterator,
|
||||||
|
Coroutine[
|
||||||
|
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
|
||||||
|
],
|
||||||
|
]:
|
||||||
|
litellm_completion_request: dict = (
|
||||||
|
LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
stream=stream,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if _is_async:
|
||||||
|
return self.async_response_api_handler(
|
||||||
|
litellm_completion_request=litellm_completion_request,
|
||||||
|
request_input=input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
litellm_completion_response: Union[
|
||||||
|
ModelResponse, litellm.CustomStreamWrapper
|
||||||
|
] = litellm.completion(
|
||||||
|
**litellm_completion_request,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(litellm_completion_response, ModelResponse):
|
||||||
|
responses_api_response: ResponsesAPIResponse = (
|
||||||
|
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||||
|
chat_completion_response=litellm_completion_response,
|
||||||
|
request_input=input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return responses_api_response
|
||||||
|
|
||||||
|
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
|
||||||
|
return LiteLLMCompletionStreamingIterator(
|
||||||
|
litellm_custom_stream_wrapper=litellm_completion_response,
|
||||||
|
request_input=input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def async_response_api_handler(
|
||||||
|
self,
|
||||||
|
litellm_completion_request: dict,
|
||||||
|
request_input: Union[str, ResponseInputParam],
|
||||||
|
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||||
|
**kwargs,
|
||||||
|
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
||||||
|
litellm_completion_response: Union[
|
||||||
|
ModelResponse, litellm.CustomStreamWrapper
|
||||||
|
] = await litellm.acompletion(
|
||||||
|
**litellm_completion_request,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(litellm_completion_response, ModelResponse):
|
||||||
|
responses_api_response: ResponsesAPIResponse = (
|
||||||
|
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||||
|
chat_completion_response=litellm_completion_response,
|
||||||
|
request_input=request_input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return responses_api_response
|
||||||
|
|
||||||
|
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
|
||||||
|
return LiteLLMCompletionStreamingIterator(
|
||||||
|
litellm_custom_stream_wrapper=litellm_completion_response,
|
||||||
|
request_input=request_input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
)
|
|
@ -0,0 +1,59 @@
|
||||||
|
"""
|
||||||
|
Responses API has previous_response_id, which is the id of the previous response.
|
||||||
|
|
||||||
|
LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
|
||||||
|
|
||||||
|
This class handles that cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
|
from litellm.caching import InMemoryCache
|
||||||
|
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
|
||||||
|
|
||||||
|
RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
|
||||||
|
MAX_PREV_SESSION_INPUTS = 50
|
||||||
|
|
||||||
|
|
||||||
|
class ResponsesAPISessionElement(TypedDict, total=False):
|
||||||
|
input: Union[str, ResponseInputParam]
|
||||||
|
output: ResponsesAPIResponse
|
||||||
|
response_id: str
|
||||||
|
previous_response_id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class SessionHandler:
|
||||||
|
|
||||||
|
def add_completed_response_to_cache(
|
||||||
|
self, response_id: str, session_element: ResponsesAPISessionElement
|
||||||
|
):
|
||||||
|
RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
|
||||||
|
key=response_id, value=session_element
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_chain_of_previous_input_output_pairs(
|
||||||
|
self, previous_response_id: str
|
||||||
|
) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
|
||||||
|
response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
|
||||||
|
current_previous_response_id = previous_response_id
|
||||||
|
|
||||||
|
count_session_elements = 0
|
||||||
|
while current_previous_response_id:
|
||||||
|
if count_session_elements > MAX_PREV_SESSION_INPUTS:
|
||||||
|
break
|
||||||
|
session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
|
||||||
|
key=current_previous_response_id
|
||||||
|
)
|
||||||
|
if session_element:
|
||||||
|
response_api_inputs.append(
|
||||||
|
(session_element.get("input"), session_element.get("output"))
|
||||||
|
)
|
||||||
|
current_previous_response_id = session_element.get(
|
||||||
|
"previous_response_id"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
count_session_elements += 1
|
||||||
|
return response_api_inputs
|
|
@ -0,0 +1,157 @@
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.main import stream_chunk_builder
|
||||||
|
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||||
|
LiteLLMCompletionResponsesConfig,
|
||||||
|
)
|
||||||
|
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
OutputTextDeltaEvent,
|
||||||
|
ResponseCompletedEvent,
|
||||||
|
ResponseInputParam,
|
||||||
|
ResponsesAPIOptionalRequestParams,
|
||||||
|
ResponsesAPIStreamEvents,
|
||||||
|
ResponsesAPIStreamingResponse,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import Delta as ChatCompletionDelta
|
||||||
|
from litellm.types.utils import (
|
||||||
|
ModelResponse,
|
||||||
|
ModelResponseStream,
|
||||||
|
StreamingChoices,
|
||||||
|
TextCompletionResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||||
|
"""
|
||||||
|
Async iterator for processing streaming responses from the Responses API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
|
||||||
|
request_input: Union[str, ResponseInputParam],
|
||||||
|
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||||
|
):
|
||||||
|
self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
|
||||||
|
litellm_custom_stream_wrapper
|
||||||
|
)
|
||||||
|
self.request_input: Union[str, ResponseInputParam] = request_input
|
||||||
|
self.responses_api_request: ResponsesAPIOptionalRequestParams = (
|
||||||
|
responses_api_request
|
||||||
|
)
|
||||||
|
self.collected_chat_completion_chunks: List[ModelResponseStream] = []
|
||||||
|
self.finished: bool = False
|
||||||
|
|
||||||
|
async def __anext__(
|
||||||
|
self,
|
||||||
|
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
if self.finished is True:
|
||||||
|
raise StopAsyncIteration
|
||||||
|
# Get the next chunk from the stream
|
||||||
|
try:
|
||||||
|
chunk = await self.litellm_custom_stream_wrapper.__anext__()
|
||||||
|
self.collected_chat_completion_chunks.append(chunk)
|
||||||
|
response_api_chunk = (
|
||||||
|
self._transform_chat_completion_chunk_to_response_api_chunk(
|
||||||
|
chunk
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if response_api_chunk:
|
||||||
|
return response_api_chunk
|
||||||
|
except StopAsyncIteration:
|
||||||
|
self.finished = True
|
||||||
|
response_completed_event = self._emit_response_completed_event()
|
||||||
|
if response_completed_event:
|
||||||
|
return response_completed_event
|
||||||
|
else:
|
||||||
|
raise StopAsyncIteration
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Handle HTTP errors
|
||||||
|
self.finished = True
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(
|
||||||
|
self,
|
||||||
|
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
if self.finished is True:
|
||||||
|
raise StopIteration
|
||||||
|
# Get the next chunk from the stream
|
||||||
|
try:
|
||||||
|
chunk = self.litellm_custom_stream_wrapper.__next__()
|
||||||
|
self.collected_chat_completion_chunks.append(chunk)
|
||||||
|
response_api_chunk = (
|
||||||
|
self._transform_chat_completion_chunk_to_response_api_chunk(
|
||||||
|
chunk
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if response_api_chunk:
|
||||||
|
return response_api_chunk
|
||||||
|
except StopIteration:
|
||||||
|
self.finished = True
|
||||||
|
response_completed_event = self._emit_response_completed_event()
|
||||||
|
if response_completed_event:
|
||||||
|
return response_completed_event
|
||||||
|
else:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Handle HTTP errors
|
||||||
|
self.finished = True
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def _transform_chat_completion_chunk_to_response_api_chunk(
|
||||||
|
self, chunk: ModelResponseStream
|
||||||
|
) -> Optional[ResponsesAPIStreamingResponse]:
|
||||||
|
"""
|
||||||
|
Transform a chat completion chunk to a response API chunk.
|
||||||
|
|
||||||
|
This currently only handles emitting the OutputTextDeltaEvent, which is used by other tools using the responses API.
|
||||||
|
"""
|
||||||
|
return OutputTextDeltaEvent(
|
||||||
|
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
|
||||||
|
item_id=chunk.id,
|
||||||
|
output_index=0,
|
||||||
|
content_index=0,
|
||||||
|
delta=self._get_delta_string_from_streaming_choices(chunk.choices),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_delta_string_from_streaming_choices(
|
||||||
|
self, choices: List[StreamingChoices]
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Get the delta string from the streaming choices
|
||||||
|
|
||||||
|
For now this collected the first choice's delta string.
|
||||||
|
|
||||||
|
It's unclear how users expect litellm to translate multiple-choices-per-chunk to the responses API output.
|
||||||
|
"""
|
||||||
|
choice = choices[0]
|
||||||
|
chat_completion_delta: ChatCompletionDelta = choice.delta
|
||||||
|
return chat_completion_delta.content or ""
|
||||||
|
|
||||||
|
def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
|
||||||
|
litellm_model_response: Optional[
|
||||||
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
|
] = stream_chunk_builder(chunks=self.collected_chat_completion_chunks)
|
||||||
|
if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
|
||||||
|
|
||||||
|
return ResponseCompletedEvent(
|
||||||
|
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||||
|
response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||||
|
request_input=self.request_input,
|
||||||
|
chat_completion_response=litellm_model_response,
|
||||||
|
responses_api_request=self.responses_api_request,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return None
|
|
@ -0,0 +1,664 @@
|
||||||
|
"""
|
||||||
|
Handles transforming from Responses API -> LiteLLM completion (Chat Completion API)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
from openai.types.responses.tool_param import FunctionToolParam
|
||||||
|
|
||||||
|
from litellm.caching import InMemoryCache
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.responses.litellm_completion_transformation.session_handler import (
|
||||||
|
ResponsesAPISessionElement,
|
||||||
|
SessionHandler,
|
||||||
|
)
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
AllMessageValues,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
ChatCompletionSystemMessage,
|
||||||
|
ChatCompletionToolCallChunk,
|
||||||
|
ChatCompletionToolCallFunctionChunk,
|
||||||
|
ChatCompletionToolMessage,
|
||||||
|
ChatCompletionToolParam,
|
||||||
|
ChatCompletionToolParamFunctionChunk,
|
||||||
|
ChatCompletionUserMessage,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
Reasoning,
|
||||||
|
ResponseAPIUsage,
|
||||||
|
ResponseInputParam,
|
||||||
|
ResponsesAPIOptionalRequestParams,
|
||||||
|
ResponsesAPIResponse,
|
||||||
|
ResponseTextConfig,
|
||||||
|
)
|
||||||
|
from litellm.types.responses.main import (
|
||||||
|
GenericResponseOutputItem,
|
||||||
|
GenericResponseOutputItemContentAnnotation,
|
||||||
|
OutputFunctionToolCall,
|
||||||
|
OutputText,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import (
|
||||||
|
ChatCompletionAnnotation,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
Choices,
|
||||||
|
Function,
|
||||||
|
Message,
|
||||||
|
ModelResponse,
|
||||||
|
Usage,
|
||||||
|
)
|
||||||
|
|
||||||
|
########### Initialize Classes used for Responses API ###########
|
||||||
|
TOOL_CALLS_CACHE = InMemoryCache()
|
||||||
|
RESPONSES_API_SESSION_HANDLER = SessionHandler()
|
||||||
|
########### End of Initialize Classes used for Responses API ###########
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMCompletionResponsesConfig:
|
||||||
|
@staticmethod
|
||||||
|
def get_supported_openai_params(model: str) -> list:
|
||||||
|
"""
|
||||||
|
LiteLLM Adapter from OpenAI Responses API to Chat Completion API supports a subset of OpenAI Responses API params
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
"input",
|
||||||
|
"model",
|
||||||
|
"instructions",
|
||||||
|
"max_output_tokens",
|
||||||
|
"metadata",
|
||||||
|
"parallel_tool_calls",
|
||||||
|
"previous_response_id",
|
||||||
|
"stream",
|
||||||
|
"temperature",
|
||||||
|
"tool_choice",
|
||||||
|
"tools",
|
||||||
|
"top_p",
|
||||||
|
"user",
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_responses_api_request_to_chat_completion_request(
|
||||||
|
model: str,
|
||||||
|
input: Union[str, ResponseInputParam],
|
||||||
|
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||||
|
custom_llm_provider: Optional[str] = None,
|
||||||
|
stream: Optional[bool] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Transform a Responses API request into a Chat Completion request
|
||||||
|
"""
|
||||||
|
litellm_completion_request: dict = {
|
||||||
|
"messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
|
||||||
|
input=input,
|
||||||
|
responses_api_request=responses_api_request,
|
||||||
|
previous_response_id=responses_api_request.get("previous_response_id"),
|
||||||
|
),
|
||||||
|
"model": model,
|
||||||
|
"tool_choice": responses_api_request.get("tool_choice"),
|
||||||
|
"tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
|
||||||
|
responses_api_request.get("tools") or [] # type: ignore
|
||||||
|
),
|
||||||
|
"top_p": responses_api_request.get("top_p"),
|
||||||
|
"user": responses_api_request.get("user"),
|
||||||
|
"temperature": responses_api_request.get("temperature"),
|
||||||
|
"parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
|
||||||
|
"max_tokens": responses_api_request.get("max_output_tokens"),
|
||||||
|
"stream": stream,
|
||||||
|
"metadata": kwargs.get("metadata"),
|
||||||
|
"service_tier": kwargs.get("service_tier"),
|
||||||
|
# litellm specific params
|
||||||
|
"custom_llm_provider": custom_llm_provider,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Responses API `Completed` events require usage, we pass `stream_options` to litellm.completion to include usage
|
||||||
|
if stream is True:
|
||||||
|
stream_options = {
|
||||||
|
"include_usage": True,
|
||||||
|
}
|
||||||
|
litellm_completion_request["stream_options"] = stream_options
|
||||||
|
litellm_logging_obj: Optional[LiteLLMLoggingObj] = kwargs.get(
|
||||||
|
"litellm_logging_obj"
|
||||||
|
)
|
||||||
|
if litellm_logging_obj:
|
||||||
|
litellm_logging_obj.stream_options = stream_options
|
||||||
|
|
||||||
|
# only pass non-None values
|
||||||
|
litellm_completion_request = {
|
||||||
|
k: v for k, v in litellm_completion_request.items() if v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
return litellm_completion_request
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_responses_api_input_to_messages(
|
||||||
|
input: Union[str, ResponseInputParam],
|
||||||
|
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||||
|
previous_response_id: Optional[str] = None,
|
||||||
|
) -> List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
]:
|
||||||
|
"""
|
||||||
|
Transform a Responses API input into a list of messages
|
||||||
|
"""
|
||||||
|
messages: List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
] = []
|
||||||
|
if responses_api_request.get("instructions"):
|
||||||
|
messages.append(
|
||||||
|
LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
|
||||||
|
responses_api_request.get("instructions")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if previous_response_id:
|
||||||
|
previous_response_pairs = (
|
||||||
|
RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
|
||||||
|
previous_response_id=previous_response_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if previous_response_pairs:
|
||||||
|
for previous_response_pair in previous_response_pairs:
|
||||||
|
chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
|
||||||
|
input=previous_response_pair[0],
|
||||||
|
)
|
||||||
|
chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
|
||||||
|
responses_api_output=previous_response_pair[1],
|
||||||
|
)
|
||||||
|
|
||||||
|
messages.extend(chat_completion_input_messages)
|
||||||
|
messages.extend(chat_completion_output_messages)
|
||||||
|
|
||||||
|
messages.extend(
|
||||||
|
LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
|
||||||
|
input=input,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return messages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_response_input_param_to_chat_completion_message(
|
||||||
|
input: Union[str, ResponseInputParam],
|
||||||
|
) -> List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
]:
|
||||||
|
"""
|
||||||
|
Transform a ResponseInputParam into a Chat Completion message
|
||||||
|
"""
|
||||||
|
messages: List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
] = []
|
||||||
|
tool_call_output_messages: List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
] = []
|
||||||
|
|
||||||
|
if isinstance(input, str):
|
||||||
|
messages.append(ChatCompletionUserMessage(role="user", content=input))
|
||||||
|
elif isinstance(input, list):
|
||||||
|
for _input in input:
|
||||||
|
chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
|
||||||
|
input_item=_input
|
||||||
|
)
|
||||||
|
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
|
||||||
|
input_item=_input
|
||||||
|
):
|
||||||
|
tool_call_output_messages.extend(chat_completion_messages)
|
||||||
|
else:
|
||||||
|
messages.extend(chat_completion_messages)
|
||||||
|
|
||||||
|
messages.extend(tool_call_output_messages)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _ensure_tool_call_output_has_corresponding_tool_call(
|
||||||
|
messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
If any tool call output is present, ensure there is a corresponding tool call/tool_use block
|
||||||
|
"""
|
||||||
|
for message in messages:
|
||||||
|
if message.get("role") == "tool":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_responses_api_input_item_to_chat_completion_message(
|
||||||
|
input_item: Any,
|
||||||
|
) -> List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
]:
|
||||||
|
"""
|
||||||
|
Transform a Responses API input item into a Chat Completion message
|
||||||
|
|
||||||
|
- EasyInputMessageParam
|
||||||
|
- Message
|
||||||
|
- ResponseOutputMessageParam
|
||||||
|
- ResponseFileSearchToolCallParam
|
||||||
|
- ResponseComputerToolCallParam
|
||||||
|
- ComputerCallOutput
|
||||||
|
- ResponseFunctionWebSearchParam
|
||||||
|
- ResponseFunctionToolCallParam
|
||||||
|
- FunctionCallOutput
|
||||||
|
- ResponseReasoningItemParam
|
||||||
|
- ItemReference
|
||||||
|
"""
|
||||||
|
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
|
||||||
|
# handle executed tool call results
|
||||||
|
return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
|
||||||
|
tool_call_output=input_item
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return [
|
||||||
|
GenericChatCompletionMessage(
|
||||||
|
role=input_item.get("role") or "user",
|
||||||
|
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
|
||||||
|
input_item.get("content")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_input_item_tool_call_output(input_item: Any) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the input item is a tool call output
|
||||||
|
"""
|
||||||
|
return input_item.get("type") in [
|
||||||
|
"function_call_output",
|
||||||
|
"web_search_call",
|
||||||
|
"computer_call_output",
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_responses_api_tool_call_output_to_chat_completion_message(
|
||||||
|
tool_call_output: Dict[str, Any],
|
||||||
|
) -> List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionResponseMessage,
|
||||||
|
]
|
||||||
|
]:
|
||||||
|
"""
|
||||||
|
ChatCompletionToolMessage is used to indicate the output from a tool call
|
||||||
|
"""
|
||||||
|
tool_output_message = ChatCompletionToolMessage(
|
||||||
|
role="tool",
|
||||||
|
content=tool_call_output.get("output") or "",
|
||||||
|
tool_call_id=tool_call_output.get("call_id") or "",
|
||||||
|
)
|
||||||
|
|
||||||
|
_tool_use_definition = TOOL_CALLS_CACHE.get_cache(
|
||||||
|
key=tool_call_output.get("call_id") or "",
|
||||||
|
)
|
||||||
|
if _tool_use_definition:
|
||||||
|
"""
|
||||||
|
Append the tool use definition to the list of messages
|
||||||
|
|
||||||
|
|
||||||
|
Providers like Anthropic require the tool use definition to be included with the tool output
|
||||||
|
|
||||||
|
- Input:
|
||||||
|
{'function':
|
||||||
|
arguments:'{"command": ["echo","<html>\\n<head>\\n <title>Hello</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n</body>\\n</html>",">","index.html"]}',
|
||||||
|
name='shell',
|
||||||
|
'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
|
||||||
|
'type': 'function'
|
||||||
|
}
|
||||||
|
- Output:
|
||||||
|
{
|
||||||
|
"id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_weather",
|
||||||
|
"arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
"""
|
||||||
|
function: dict = _tool_use_definition.get("function") or {}
|
||||||
|
tool_call_chunk = ChatCompletionToolCallChunk(
|
||||||
|
id=_tool_use_definition.get("id") or "",
|
||||||
|
type=_tool_use_definition.get("type") or "function",
|
||||||
|
function=ChatCompletionToolCallFunctionChunk(
|
||||||
|
name=function.get("name") or "",
|
||||||
|
arguments=function.get("arguments") or "",
|
||||||
|
),
|
||||||
|
index=0,
|
||||||
|
)
|
||||||
|
chat_completion_response_message = ChatCompletionResponseMessage(
|
||||||
|
tool_calls=[tool_call_chunk],
|
||||||
|
role="assistant",
|
||||||
|
)
|
||||||
|
return [chat_completion_response_message, tool_output_message]
|
||||||
|
|
||||||
|
return [tool_output_message]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_responses_api_content_to_chat_completion_content(
|
||||||
|
content: Any,
|
||||||
|
) -> Union[str, List[Union[str, Dict[str, Any]]]]:
|
||||||
|
"""
|
||||||
|
Transform a Responses API content into a Chat Completion content
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
elif isinstance(content, list):
|
||||||
|
content_list: List[Union[str, Dict[str, Any]]] = []
|
||||||
|
for item in content:
|
||||||
|
if isinstance(item, str):
|
||||||
|
content_list.append(item)
|
||||||
|
elif isinstance(item, dict):
|
||||||
|
content_list.append(
|
||||||
|
{
|
||||||
|
"type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
|
||||||
|
item.get("type") or "text"
|
||||||
|
),
|
||||||
|
"text": item.get("text"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return content_list
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid content type: {type(content)}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_chat_completion_request_content_type(content_type: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the Chat Completion request content type
|
||||||
|
"""
|
||||||
|
# Responses API content has `input_` prefix, if it exists, remove it
|
||||||
|
if content_type.startswith("input_"):
|
||||||
|
return content_type[len("input_") :]
|
||||||
|
else:
|
||||||
|
return content_type
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_instructions_to_system_message(
|
||||||
|
instructions: Optional[str],
|
||||||
|
) -> ChatCompletionSystemMessage:
|
||||||
|
"""
|
||||||
|
Transform a Instructions into a system message
|
||||||
|
"""
|
||||||
|
return ChatCompletionSystemMessage(role="system", content=instructions or "")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_responses_api_tools_to_chat_completion_tools(
|
||||||
|
tools: Optional[List[FunctionToolParam]],
|
||||||
|
) -> List[ChatCompletionToolParam]:
|
||||||
|
"""
|
||||||
|
Transform a Responses API tools into a Chat Completion tools
|
||||||
|
"""
|
||||||
|
if tools is None:
|
||||||
|
return []
|
||||||
|
chat_completion_tools: List[ChatCompletionToolParam] = []
|
||||||
|
for tool in tools:
|
||||||
|
chat_completion_tools.append(
|
||||||
|
ChatCompletionToolParam(
|
||||||
|
type="function",
|
||||||
|
function=ChatCompletionToolParamFunctionChunk(
|
||||||
|
name=tool["name"],
|
||||||
|
description=tool.get("description") or "",
|
||||||
|
parameters=tool.get("parameters", {}),
|
||||||
|
strict=tool.get("strict", False),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return chat_completion_tools
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_chat_completion_tools_to_responses_tools(
|
||||||
|
chat_completion_response: ModelResponse,
|
||||||
|
) -> List[OutputFunctionToolCall]:
|
||||||
|
"""
|
||||||
|
Transform a Chat Completion tools into a Responses API tools
|
||||||
|
"""
|
||||||
|
all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
|
||||||
|
for choice in chat_completion_response.choices:
|
||||||
|
if isinstance(choice, Choices):
|
||||||
|
if choice.message.tool_calls:
|
||||||
|
all_chat_completion_tools.extend(choice.message.tool_calls)
|
||||||
|
for tool_call in choice.message.tool_calls:
|
||||||
|
TOOL_CALLS_CACHE.set_cache(
|
||||||
|
key=tool_call.id,
|
||||||
|
value=tool_call,
|
||||||
|
)
|
||||||
|
|
||||||
|
responses_tools: List[OutputFunctionToolCall] = []
|
||||||
|
for tool in all_chat_completion_tools:
|
||||||
|
if tool.type == "function":
|
||||||
|
function_definition = tool.function
|
||||||
|
responses_tools.append(
|
||||||
|
OutputFunctionToolCall(
|
||||||
|
name=function_definition.name or "",
|
||||||
|
arguments=function_definition.get("arguments") or "",
|
||||||
|
call_id=tool.id or "",
|
||||||
|
id=tool.id or "",
|
||||||
|
type="function_call", # critical this is "function_call" to work with tools like openai codex
|
||||||
|
status=function_definition.get("status") or "completed",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return responses_tools
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_chat_completion_response_to_responses_api_response(
|
||||||
|
request_input: Union[str, ResponseInputParam],
|
||||||
|
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||||
|
chat_completion_response: ModelResponse,
|
||||||
|
) -> ResponsesAPIResponse:
|
||||||
|
"""
|
||||||
|
Transform a Chat Completion response into a Responses API response
|
||||||
|
"""
|
||||||
|
responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
|
||||||
|
id=chat_completion_response.id,
|
||||||
|
created_at=chat_completion_response.created,
|
||||||
|
model=chat_completion_response.model,
|
||||||
|
object=chat_completion_response.object,
|
||||||
|
error=getattr(chat_completion_response, "error", None),
|
||||||
|
incomplete_details=getattr(
|
||||||
|
chat_completion_response, "incomplete_details", None
|
||||||
|
),
|
||||||
|
instructions=getattr(chat_completion_response, "instructions", None),
|
||||||
|
metadata=getattr(chat_completion_response, "metadata", {}),
|
||||||
|
output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
|
||||||
|
chat_completion_response=chat_completion_response,
|
||||||
|
choices=getattr(chat_completion_response, "choices", []),
|
||||||
|
),
|
||||||
|
parallel_tool_calls=getattr(
|
||||||
|
chat_completion_response, "parallel_tool_calls", False
|
||||||
|
),
|
||||||
|
temperature=getattr(chat_completion_response, "temperature", 0),
|
||||||
|
tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
|
||||||
|
tools=getattr(chat_completion_response, "tools", []),
|
||||||
|
top_p=getattr(chat_completion_response, "top_p", None),
|
||||||
|
max_output_tokens=getattr(
|
||||||
|
chat_completion_response, "max_output_tokens", None
|
||||||
|
),
|
||||||
|
previous_response_id=getattr(
|
||||||
|
chat_completion_response, "previous_response_id", None
|
||||||
|
),
|
||||||
|
reasoning=Reasoning(),
|
||||||
|
status=getattr(chat_completion_response, "status", "completed"),
|
||||||
|
text=ResponseTextConfig(),
|
||||||
|
truncation=getattr(chat_completion_response, "truncation", None),
|
||||||
|
usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
|
||||||
|
chat_completion_response=chat_completion_response
|
||||||
|
),
|
||||||
|
user=getattr(chat_completion_response, "user", None),
|
||||||
|
)
|
||||||
|
|
||||||
|
RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
|
||||||
|
response_id=responses_api_response.id,
|
||||||
|
session_element=ResponsesAPISessionElement(
|
||||||
|
input=request_input,
|
||||||
|
output=responses_api_response,
|
||||||
|
response_id=responses_api_response.id,
|
||||||
|
previous_response_id=responses_api_request.get("previous_response_id"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return responses_api_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_chat_completion_choices_to_responses_output(
|
||||||
|
chat_completion_response: ModelResponse,
|
||||||
|
choices: List[Choices],
|
||||||
|
) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
|
||||||
|
responses_output: List[
|
||||||
|
Union[GenericResponseOutputItem, OutputFunctionToolCall]
|
||||||
|
] = []
|
||||||
|
for choice in choices:
|
||||||
|
responses_output.append(
|
||||||
|
GenericResponseOutputItem(
|
||||||
|
type="message",
|
||||||
|
id=chat_completion_response.id,
|
||||||
|
status=choice.finish_reason,
|
||||||
|
role=choice.message.role,
|
||||||
|
content=[
|
||||||
|
LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
|
||||||
|
choice.message
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
|
||||||
|
chat_completion_response=chat_completion_response
|
||||||
|
)
|
||||||
|
responses_output.extend(tool_calls)
|
||||||
|
return responses_output
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_responses_api_outputs_to_chat_completion_messages(
|
||||||
|
responses_api_output: ResponsesAPIResponse,
|
||||||
|
) -> List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
]
|
||||||
|
]:
|
||||||
|
messages: List[
|
||||||
|
Union[
|
||||||
|
AllMessageValues,
|
||||||
|
GenericChatCompletionMessage,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
]
|
||||||
|
] = []
|
||||||
|
output_items = responses_api_output.output
|
||||||
|
for _output_item in output_items:
|
||||||
|
output_item: dict = dict(_output_item)
|
||||||
|
if output_item.get("type") == "function_call":
|
||||||
|
# handle function call output
|
||||||
|
messages.append(
|
||||||
|
LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
|
||||||
|
tool_call=output_item
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# transform as generic ResponseOutputItem
|
||||||
|
messages.append(
|
||||||
|
GenericChatCompletionMessage(
|
||||||
|
role=str(output_item.get("role")) or "user",
|
||||||
|
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
|
||||||
|
output_item.get("content")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
|
||||||
|
tool_call: dict,
|
||||||
|
) -> ChatCompletionMessageToolCall:
|
||||||
|
return ChatCompletionMessageToolCall(
|
||||||
|
id=tool_call.get("id") or "",
|
||||||
|
type="function",
|
||||||
|
function=Function(
|
||||||
|
name=tool_call.get("name") or "",
|
||||||
|
arguments=tool_call.get("arguments") or "",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_chat_message_to_response_output_text(
|
||||||
|
message: Message,
|
||||||
|
) -> OutputText:
|
||||||
|
return OutputText(
|
||||||
|
type="output_text",
|
||||||
|
text=message.content,
|
||||||
|
annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
|
||||||
|
annotations=getattr(message, "annotations", None)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_chat_completion_annotations_to_response_output_annotations(
|
||||||
|
annotations: Optional[List[ChatCompletionAnnotation]],
|
||||||
|
) -> List[GenericResponseOutputItemContentAnnotation]:
|
||||||
|
response_output_annotations: List[
|
||||||
|
GenericResponseOutputItemContentAnnotation
|
||||||
|
] = []
|
||||||
|
|
||||||
|
if annotations is None:
|
||||||
|
return response_output_annotations
|
||||||
|
|
||||||
|
for annotation in annotations:
|
||||||
|
annotation_type = annotation.get("type")
|
||||||
|
if annotation_type == "url_citation" and "url_citation" in annotation:
|
||||||
|
url_citation = annotation["url_citation"]
|
||||||
|
response_output_annotations.append(
|
||||||
|
GenericResponseOutputItemContentAnnotation(
|
||||||
|
type=annotation_type,
|
||||||
|
start_index=url_citation.get("start_index"),
|
||||||
|
end_index=url_citation.get("end_index"),
|
||||||
|
url=url_citation.get("url"),
|
||||||
|
title=url_citation.get("title"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# Handle other annotation types here
|
||||||
|
|
||||||
|
return response_output_annotations
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_chat_completion_usage_to_responses_usage(
|
||||||
|
chat_completion_response: ModelResponse,
|
||||||
|
) -> ResponseAPIUsage:
|
||||||
|
usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
|
||||||
|
if usage is None:
|
||||||
|
return ResponseAPIUsage(
|
||||||
|
input_tokens=0,
|
||||||
|
output_tokens=0,
|
||||||
|
total_tokens=0,
|
||||||
|
)
|
||||||
|
return ResponseAPIUsage(
|
||||||
|
input_tokens=usage.prompt_tokens,
|
||||||
|
output_tokens=usage.completion_tokens,
|
||||||
|
total_tokens=usage.total_tokens,
|
||||||
|
)
|
|
@ -10,6 +10,9 @@ from litellm.constants import request_timeout
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||||
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
||||||
|
from litellm.responses.litellm_completion_transformation.handler import (
|
||||||
|
LiteLLMCompletionTransformationHandler,
|
||||||
|
)
|
||||||
from litellm.responses.utils import ResponsesAPIRequestUtils
|
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
Reasoning,
|
Reasoning,
|
||||||
|
@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||||
####### ENVIRONMENT VARIABLES ###################
|
####### ENVIRONMENT VARIABLES ###################
|
||||||
# Initialize any necessary instances or variables here
|
# Initialize any necessary instances or variables here
|
||||||
base_llm_http_handler = BaseLLMHTTPHandler()
|
base_llm_http_handler = BaseLLMHTTPHandler()
|
||||||
|
litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
|
||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
|
|
||||||
|
@ -178,19 +182,12 @@ def responses(
|
||||||
)
|
)
|
||||||
|
|
||||||
# get provider config
|
# get provider config
|
||||||
responses_api_provider_config: Optional[
|
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
||||||
BaseResponsesAPIConfig
|
ProviderConfigManager.get_provider_responses_api_config(
|
||||||
] = ProviderConfigManager.get_provider_responses_api_config(
|
|
||||||
model=model,
|
|
||||||
provider=litellm.LlmProviders(custom_llm_provider),
|
|
||||||
)
|
|
||||||
|
|
||||||
if responses_api_provider_config is None:
|
|
||||||
raise litellm.BadRequestError(
|
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
provider=litellm.LlmProviders(custom_llm_provider),
|
||||||
message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
local_vars.update(kwargs)
|
local_vars.update(kwargs)
|
||||||
# Get ResponsesAPIOptionalRequestParams with only valid parameters
|
# Get ResponsesAPIOptionalRequestParams with only valid parameters
|
||||||
|
@ -200,6 +197,17 @@ def responses(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if responses_api_provider_config is None:
|
||||||
|
return litellm_completion_transformation_handler.response_api_handler(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
responses_api_request=response_api_optional_params,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
_is_async=_is_async,
|
||||||
|
stream=stream,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
# Get optional parameters for the responses API
|
# Get optional parameters for the responses API
|
||||||
responses_api_request_params: Dict = (
|
responses_api_request_params: Dict = (
|
||||||
ResponsesAPIRequestUtils.get_optional_params_responses_api(
|
ResponsesAPIRequestUtils.get_optional_params_responses_api(
|
||||||
|
|
|
@ -11,7 +11,9 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
OutputTextDeltaEvent,
|
||||||
ResponseCompletedEvent,
|
ResponseCompletedEvent,
|
||||||
|
ResponsesAPIResponse,
|
||||||
ResponsesAPIStreamEvents,
|
ResponsesAPIStreamEvents,
|
||||||
ResponsesAPIStreamingResponse,
|
ResponsesAPIStreamingResponse,
|
||||||
)
|
)
|
||||||
|
@ -212,9 +214,14 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||||
|
|
||||||
class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||||
"""
|
"""
|
||||||
mock iterator - some models like o1-pro do not support streaming, we need to fake a stream
|
Mock iterator—fake a stream by slicing the full response text into
|
||||||
|
5 char deltas, then emit a completed event.
|
||||||
|
|
||||||
|
Models like o1-pro don't support streaming, so we fake it.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
CHUNK_SIZE = 5
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
response: httpx.Response,
|
response: httpx.Response,
|
||||||
|
@ -222,49 +229,68 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||||
logging_obj: LiteLLMLoggingObj,
|
logging_obj: LiteLLMLoggingObj,
|
||||||
):
|
):
|
||||||
self.raw_http_response = response
|
|
||||||
super().__init__(
|
super().__init__(
|
||||||
response=response,
|
response=response,
|
||||||
model=model,
|
model=model,
|
||||||
responses_api_provider_config=responses_api_provider_config,
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
)
|
)
|
||||||
self.is_done = False
|
|
||||||
|
# one-time transform
|
||||||
|
transformed = (
|
||||||
|
self.responses_api_provider_config.transform_response_api_response(
|
||||||
|
model=self.model,
|
||||||
|
raw_response=response,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
full_text = self._collect_text(transformed)
|
||||||
|
|
||||||
|
# build a list of 5‑char delta events
|
||||||
|
deltas = [
|
||||||
|
OutputTextDeltaEvent(
|
||||||
|
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
|
||||||
|
delta=full_text[i : i + self.CHUNK_SIZE],
|
||||||
|
item_id=transformed.id,
|
||||||
|
output_index=0,
|
||||||
|
content_index=0,
|
||||||
|
)
|
||||||
|
for i in range(0, len(full_text), self.CHUNK_SIZE)
|
||||||
|
]
|
||||||
|
|
||||||
|
# append the completed event
|
||||||
|
self._events = deltas + [
|
||||||
|
ResponseCompletedEvent(
|
||||||
|
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||||
|
response=transformed,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
self._idx = 0
|
||||||
|
|
||||||
def __aiter__(self):
|
def __aiter__(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def __anext__(self) -> ResponsesAPIStreamingResponse:
|
async def __anext__(self) -> ResponsesAPIStreamingResponse:
|
||||||
if self.is_done:
|
if self._idx >= len(self._events):
|
||||||
raise StopAsyncIteration
|
raise StopAsyncIteration
|
||||||
self.is_done = True
|
evt = self._events[self._idx]
|
||||||
transformed_response = (
|
self._idx += 1
|
||||||
self.responses_api_provider_config.transform_response_api_response(
|
return evt
|
||||||
model=self.model,
|
|
||||||
raw_response=self.raw_http_response,
|
|
||||||
logging_obj=self.logging_obj,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return ResponseCompletedEvent(
|
|
||||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
|
||||||
response=transformed_response,
|
|
||||||
)
|
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __next__(self) -> ResponsesAPIStreamingResponse:
|
def __next__(self) -> ResponsesAPIStreamingResponse:
|
||||||
if self.is_done:
|
if self._idx >= len(self._events):
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
self.is_done = True
|
evt = self._events[self._idx]
|
||||||
transformed_response = (
|
self._idx += 1
|
||||||
self.responses_api_provider_config.transform_response_api_response(
|
return evt
|
||||||
model=self.model,
|
|
||||||
raw_response=self.raw_http_response,
|
def _collect_text(self, resp: ResponsesAPIResponse) -> str:
|
||||||
logging_obj=self.logging_obj,
|
out = ""
|
||||||
)
|
for out_item in resp.output:
|
||||||
)
|
if out_item.type == "message":
|
||||||
return ResponseCompletedEvent(
|
for c in getattr(out_item, "content", []):
|
||||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
out += c.text
|
||||||
response=transformed_response,
|
return out
|
||||||
)
|
|
||||||
|
|
|
@ -1104,17 +1104,21 @@ class Router:
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Adds default litellm params to kwargs, if set.
|
Adds default litellm params to kwargs, if set.
|
||||||
|
|
||||||
|
Handles inserting this as either "metadata" or "litellm_metadata" depending on the metadata_variable_name
|
||||||
"""
|
"""
|
||||||
self.default_litellm_params[
|
# 1) copy your defaults and pull out metadata
|
||||||
metadata_variable_name
|
defaults = self.default_litellm_params.copy()
|
||||||
] = self.default_litellm_params.pop("metadata", {})
|
metadata_defaults = defaults.pop("metadata", {}) or {}
|
||||||
for k, v in self.default_litellm_params.items():
|
|
||||||
if (
|
# 2) add any non-metadata defaults that aren't already in kwargs
|
||||||
k not in kwargs and v is not None
|
for key, value in defaults.items():
|
||||||
): # prioritize model-specific params > default router params
|
if value is None:
|
||||||
kwargs[k] = v
|
continue
|
||||||
elif k == metadata_variable_name:
|
kwargs.setdefault(key, value)
|
||||||
kwargs[metadata_variable_name].update(v)
|
|
||||||
|
# 3) merge in metadata, this handles inserting this as either "metadata" or "litellm_metadata"
|
||||||
|
kwargs.setdefault(metadata_variable_name, {}).update(metadata_defaults)
|
||||||
|
|
||||||
def _handle_clientside_credential(
|
def _handle_clientside_credential(
|
||||||
self, deployment: dict, kwargs: dict
|
self, deployment: dict, kwargs: dict
|
||||||
|
@ -4979,8 +4983,12 @@ class Router:
|
||||||
)
|
)
|
||||||
|
|
||||||
if model_group_info is None:
|
if model_group_info is None:
|
||||||
model_group_info = ModelGroupInfo(
|
model_group_info = ModelGroupInfo( # type: ignore
|
||||||
model_group=user_facing_model_group_name, providers=[llm_provider], **model_info # type: ignore
|
**{
|
||||||
|
"model_group": user_facing_model_group_name,
|
||||||
|
"providers": [llm_provider],
|
||||||
|
**model_info,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# if max_input_tokens > curr
|
# if max_input_tokens > curr
|
||||||
|
|
15
litellm/types/llms/base.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class BaseLiteLLMOpenAIResponseObject(BaseModel):
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.__dict__[key]
|
||||||
|
|
||||||
|
def get(self, key, default=None):
|
||||||
|
return self.__dict__.get(key, default)
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self.__dict__
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return self.__dict__.items()
|
|
@ -179,6 +179,7 @@ class ToolUseBlockStartEvent(TypedDict):
|
||||||
|
|
||||||
class ContentBlockStartEvent(TypedDict, total=False):
|
class ContentBlockStartEvent(TypedDict, total=False):
|
||||||
toolUse: Optional[ToolUseBlockStartEvent]
|
toolUse: Optional[ToolUseBlockStartEvent]
|
||||||
|
reasoningContent: BedrockConverseReasoningContentBlockDelta
|
||||||
|
|
||||||
|
|
||||||
class ContentBlockDeltaEvent(TypedDict, total=False):
|
class ContentBlockDeltaEvent(TypedDict, total=False):
|
||||||
|
|
|
@ -49,9 +49,16 @@ from openai.types.responses.response_create_params import (
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolParam,
|
ToolParam,
|
||||||
)
|
)
|
||||||
|
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
|
||||||
from pydantic import BaseModel, Discriminator, Field, PrivateAttr
|
from pydantic import BaseModel, Discriminator, Field, PrivateAttr
|
||||||
from typing_extensions import Annotated, Dict, Required, TypedDict, override
|
from typing_extensions import Annotated, Dict, Required, TypedDict, override
|
||||||
|
|
||||||
|
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
|
||||||
|
from litellm.types.responses.main import (
|
||||||
|
GenericResponseOutputItem,
|
||||||
|
OutputFunctionToolCall,
|
||||||
|
)
|
||||||
|
|
||||||
FileContent = Union[IO[bytes], bytes, PathLike]
|
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||||
|
|
||||||
FileTypes = Union[
|
FileTypes = Union[
|
||||||
|
@ -461,6 +468,12 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
|
||||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
|
||||||
|
type: Required[Literal["redacted_thinking"]]
|
||||||
|
data: str
|
||||||
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
|
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
|
||||||
city: str
|
city: str
|
||||||
"""Free text input for the city of the user, e.g. `San Francisco`."""
|
"""Free text input for the city of the user, e.g. `San Francisco`."""
|
||||||
|
@ -638,6 +651,7 @@ class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
|
||||||
name: Optional[str]
|
name: Optional[str]
|
||||||
tool_calls: Optional[List[ChatCompletionAssistantToolCall]]
|
tool_calls: Optional[List[ChatCompletionAssistantToolCall]]
|
||||||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||||
|
reasoning_content: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
|
class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
|
||||||
|
@ -678,6 +692,11 @@ class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total
|
||||||
cache_control: ChatCompletionCachedContent
|
cache_control: ChatCompletionCachedContent
|
||||||
|
|
||||||
|
|
||||||
|
class GenericChatCompletionMessage(TypedDict, total=False):
|
||||||
|
role: Required[str]
|
||||||
|
content: Required[Union[str, List]]
|
||||||
|
|
||||||
|
|
||||||
ValidUserMessageContentTypes = [
|
ValidUserMessageContentTypes = [
|
||||||
"text",
|
"text",
|
||||||
"image_url",
|
"image_url",
|
||||||
|
@ -785,7 +804,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
|
||||||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||||
provider_specific_fields: Optional[dict]
|
provider_specific_fields: Optional[dict]
|
||||||
reasoning_content: Optional[str]
|
reasoning_content: Optional[str]
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
|
thinking_blocks: Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionUsageBlock(TypedDict):
|
class ChatCompletionUsageBlock(TypedDict):
|
||||||
|
@ -872,6 +893,19 @@ OpenAIAudioTranscriptionOptionalParams = Literal[
|
||||||
OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
|
OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
|
||||||
|
|
||||||
|
|
||||||
|
class ComputerToolParam(TypedDict, total=False):
|
||||||
|
display_height: Required[float]
|
||||||
|
"""The height of the computer display."""
|
||||||
|
|
||||||
|
display_width: Required[float]
|
||||||
|
"""The width of the computer display."""
|
||||||
|
|
||||||
|
environment: Required[Union[Literal["mac", "windows", "ubuntu", "browser"], str]]
|
||||||
|
"""The type of computer environment to control."""
|
||||||
|
|
||||||
|
type: Required[Union[Literal["computer_use_preview"], str]]
|
||||||
|
|
||||||
|
|
||||||
class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
|
class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
|
||||||
"""TypedDict for Optional parameters supported by the responses API."""
|
"""TypedDict for Optional parameters supported by the responses API."""
|
||||||
|
|
||||||
|
@ -887,7 +921,7 @@ class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
|
||||||
temperature: Optional[float]
|
temperature: Optional[float]
|
||||||
text: Optional[ResponseTextConfigParam]
|
text: Optional[ResponseTextConfigParam]
|
||||||
tool_choice: Optional[ToolChoice]
|
tool_choice: Optional[ToolChoice]
|
||||||
tools: Optional[Iterable[ToolParam]]
|
tools: Optional[List[Union[ToolParam, ComputerToolParam]]]
|
||||||
top_p: Optional[float]
|
top_p: Optional[float]
|
||||||
truncation: Optional[Literal["auto", "disabled"]]
|
truncation: Optional[Literal["auto", "disabled"]]
|
||||||
user: Optional[str]
|
user: Optional[str]
|
||||||
|
@ -900,20 +934,6 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
|
||||||
model: str
|
model: str
|
||||||
|
|
||||||
|
|
||||||
class BaseLiteLLMOpenAIResponseObject(BaseModel):
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self.__dict__[key]
|
|
||||||
|
|
||||||
def get(self, key, default=None):
|
|
||||||
return self.__dict__.get(key, default)
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
|
||||||
return key in self.__dict__
|
|
||||||
|
|
||||||
def items(self):
|
|
||||||
return self.__dict__.items()
|
|
||||||
|
|
||||||
|
|
||||||
class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
|
class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
|
||||||
reasoning_tokens: Optional[int] = None
|
reasoning_tokens: Optional[int] = None
|
||||||
|
|
||||||
|
@ -958,11 +978,14 @@ class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
|
||||||
metadata: Optional[Dict]
|
metadata: Optional[Dict]
|
||||||
model: Optional[str]
|
model: Optional[str]
|
||||||
object: Optional[str]
|
object: Optional[str]
|
||||||
output: List[ResponseOutputItem]
|
output: Union[
|
||||||
|
List[ResponseOutputItem],
|
||||||
|
List[Union[GenericResponseOutputItem, OutputFunctionToolCall]],
|
||||||
|
]
|
||||||
parallel_tool_calls: bool
|
parallel_tool_calls: bool
|
||||||
temperature: Optional[float]
|
temperature: Optional[float]
|
||||||
tool_choice: ToolChoice
|
tool_choice: ToolChoice
|
||||||
tools: List[Tool]
|
tools: Union[List[Tool], List[ResponseFunctionToolCall]]
|
||||||
top_p: Optional[float]
|
top_p: Optional[float]
|
||||||
max_output_tokens: Optional[int]
|
max_output_tokens: Optional[int]
|
||||||
previous_response_id: Optional[str]
|
previous_response_id: Optional[str]
|
||||||
|
|
|
@ -39,6 +39,7 @@ class PartType(TypedDict, total=False):
|
||||||
file_data: FileDataType
|
file_data: FileDataType
|
||||||
function_call: FunctionCall
|
function_call: FunctionCall
|
||||||
function_response: FunctionResponse
|
function_response: FunctionResponse
|
||||||
|
thought: bool
|
||||||
|
|
||||||
|
|
||||||
class HttpxFunctionCall(TypedDict):
|
class HttpxFunctionCall(TypedDict):
|
||||||
|
@ -69,6 +70,7 @@ class HttpxPartType(TypedDict, total=False):
|
||||||
functionResponse: FunctionResponse
|
functionResponse: FunctionResponse
|
||||||
executableCode: HttpxExecutableCode
|
executableCode: HttpxExecutableCode
|
||||||
codeExecutionResult: HttpxCodeExecutionResult
|
codeExecutionResult: HttpxCodeExecutionResult
|
||||||
|
thought: bool
|
||||||
|
|
||||||
|
|
||||||
class HttpxContentType(TypedDict, total=False):
|
class HttpxContentType(TypedDict, total=False):
|
||||||
|
@ -166,6 +168,11 @@ class SafetSettingsConfig(TypedDict, total=False):
|
||||||
method: HarmBlockMethod
|
method: HarmBlockMethod
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiThinkingConfig(TypedDict, total=False):
|
||||||
|
includeThoughts: bool
|
||||||
|
thinkingBudget: int
|
||||||
|
|
||||||
|
|
||||||
class GenerationConfig(TypedDict, total=False):
|
class GenerationConfig(TypedDict, total=False):
|
||||||
temperature: float
|
temperature: float
|
||||||
top_p: float
|
top_p: float
|
||||||
|
@ -181,6 +188,7 @@ class GenerationConfig(TypedDict, total=False):
|
||||||
responseLogprobs: bool
|
responseLogprobs: bool
|
||||||
logprobs: int
|
logprobs: int
|
||||||
responseModalities: List[Literal["TEXT", "IMAGE", "AUDIO", "VIDEO"]]
|
responseModalities: List[Literal["TEXT", "IMAGE", "AUDIO", "VIDEO"]]
|
||||||
|
thinkingConfig: GeminiThinkingConfig
|
||||||
|
|
||||||
|
|
||||||
class Tools(TypedDict, total=False):
|
class Tools(TypedDict, total=False):
|
||||||
|
@ -212,6 +220,7 @@ class UsageMetadata(TypedDict, total=False):
|
||||||
candidatesTokenCount: int
|
candidatesTokenCount: int
|
||||||
cachedContentTokenCount: int
|
cachedContentTokenCount: int
|
||||||
promptTokensDetails: List[PromptTokensDetails]
|
promptTokensDetails: List[PromptTokensDetails]
|
||||||
|
thoughtsTokenCount: int
|
||||||
|
|
||||||
|
|
||||||
class CachedContent(TypedDict, total=False):
|
class CachedContent(TypedDict, total=False):
|
||||||
|
|
|
@ -39,6 +39,7 @@ class KeyMetadata(BaseModel):
|
||||||
"""Metadata for a key"""
|
"""Metadata for a key"""
|
||||||
|
|
||||||
key_alias: Optional[str] = None
|
key_alias: Optional[str] = None
|
||||||
|
team_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class KeyMetricWithMetadata(MetricBase):
|
class KeyMetricWithMetadata(MetricBase):
|
||||||
|
|
48
litellm/types/responses/main.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from typing_extensions import Any, List, Optional, TypedDict
|
||||||
|
|
||||||
|
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
|
||||||
|
|
||||||
|
|
||||||
|
class GenericResponseOutputItemContentAnnotation(BaseLiteLLMOpenAIResponseObject):
|
||||||
|
"""Annotation for content in a message"""
|
||||||
|
|
||||||
|
type: Optional[str]
|
||||||
|
start_index: Optional[int]
|
||||||
|
end_index: Optional[int]
|
||||||
|
url: Optional[str]
|
||||||
|
title: Optional[str]
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class OutputText(BaseLiteLLMOpenAIResponseObject):
|
||||||
|
"""Text output content from an assistant message"""
|
||||||
|
|
||||||
|
type: Optional[str] # "output_text"
|
||||||
|
text: Optional[str]
|
||||||
|
annotations: Optional[List[GenericResponseOutputItemContentAnnotation]]
|
||||||
|
|
||||||
|
|
||||||
|
class OutputFunctionToolCall(BaseLiteLLMOpenAIResponseObject):
|
||||||
|
"""A tool call to run a function"""
|
||||||
|
|
||||||
|
arguments: Optional[str]
|
||||||
|
call_id: Optional[str]
|
||||||
|
name: Optional[str]
|
||||||
|
type: Optional[str] # "function_call"
|
||||||
|
id: Optional[str]
|
||||||
|
status: Literal["in_progress", "completed", "incomplete"]
|
||||||
|
|
||||||
|
|
||||||
|
class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
|
||||||
|
"""
|
||||||
|
Generic response API output item
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: str # "message"
|
||||||
|
id: str
|
||||||
|
status: str # "completed", "in_progress", etc.
|
||||||
|
role: str # "assistant", "user", etc.
|
||||||
|
content: List[OutputText]
|
|
@ -1,3 +1,4 @@
|
||||||
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -30,3 +31,23 @@ class TagDeleteRequest(BaseModel):
|
||||||
|
|
||||||
class TagInfoRequest(BaseModel):
|
class TagInfoRequest(BaseModel):
|
||||||
names: List[str]
|
names: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_DailyTagSpendTable(BaseModel):
|
||||||
|
id: str
|
||||||
|
tag: str
|
||||||
|
date: str
|
||||||
|
api_key: str
|
||||||
|
model: str
|
||||||
|
model_group: Optional[str]
|
||||||
|
custom_llm_provider: Optional[str]
|
||||||
|
prompt_tokens: int
|
||||||
|
completion_tokens: int
|
||||||
|
cache_read_input_tokens: int
|
||||||
|
cache_creation_input_tokens: int
|
||||||
|
spend: float
|
||||||
|
api_requests: int
|
||||||
|
successful_requests: int
|
||||||
|
failed_requests: int
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
|
@ -29,6 +29,7 @@ from .guardrails import GuardrailEventHooks
|
||||||
from .llms.openai import (
|
from .llms.openai import (
|
||||||
Batch,
|
Batch,
|
||||||
ChatCompletionAnnotation,
|
ChatCompletionAnnotation,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
|
@ -150,6 +151,7 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
||||||
] # only for vertex ai models
|
] # only for vertex ai models
|
||||||
output_cost_per_image: Optional[float]
|
output_cost_per_image: Optional[float]
|
||||||
output_vector_size: Optional[int]
|
output_vector_size: Optional[int]
|
||||||
|
output_cost_per_reasoning_token: Optional[float]
|
||||||
output_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
output_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
||||||
output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
||||||
output_cost_per_second: Optional[float] # for OpenAI Speech models
|
output_cost_per_second: Optional[float] # for OpenAI Speech models
|
||||||
|
@ -377,12 +379,18 @@ class Function(OpenAIObject):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
arguments: Optional[Union[Dict, str]],
|
arguments: Optional[Union[Dict, str]] = None,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
if arguments is None:
|
if arguments is None:
|
||||||
arguments = ""
|
if params.get("parameters", None) is not None and isinstance(
|
||||||
|
params["parameters"], dict
|
||||||
|
):
|
||||||
|
arguments = json.dumps(params["parameters"])
|
||||||
|
params.pop("parameters")
|
||||||
|
else:
|
||||||
|
arguments = ""
|
||||||
elif isinstance(arguments, Dict):
|
elif isinstance(arguments, Dict):
|
||||||
arguments = json.dumps(arguments)
|
arguments = json.dumps(arguments)
|
||||||
else:
|
else:
|
||||||
|
@ -391,7 +399,7 @@ class Function(OpenAIObject):
|
||||||
name = name
|
name = name
|
||||||
|
|
||||||
# Build a dictionary with the structure your BaseModel expects
|
# Build a dictionary with the structure your BaseModel expects
|
||||||
data = {"arguments": arguments, "name": name, **params}
|
data = {"arguments": arguments, "name": name}
|
||||||
|
|
||||||
super(Function, self).__init__(**data)
|
super(Function, self).__init__(**data)
|
||||||
|
|
||||||
|
@ -545,7 +553,9 @@ class Message(OpenAIObject):
|
||||||
function_call: Optional[FunctionCall]
|
function_call: Optional[FunctionCall]
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None
|
audio: Optional[ChatCompletionAudioResponse] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
] = None
|
||||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
||||||
default=None, exclude=True
|
default=None, exclude=True
|
||||||
)
|
)
|
||||||
|
@ -560,7 +570,11 @@ class Message(OpenAIObject):
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||||
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
||||||
reasoning_content: Optional[str] = None,
|
reasoning_content: Optional[str] = None,
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None,
|
||||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
@ -643,7 +657,9 @@ class Message(OpenAIObject):
|
||||||
|
|
||||||
class Delta(OpenAIObject):
|
class Delta(OpenAIObject):
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
] = None
|
||||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -654,7 +670,11 @@ class Delta(OpenAIObject):
|
||||||
tool_calls=None,
|
tool_calls=None,
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||||
reasoning_content: Optional[str] = None,
|
reasoning_content: Optional[str] = None,
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None,
|
||||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
@ -829,8 +849,11 @@ class Usage(CompletionUsage):
|
||||||
# handle reasoning_tokens
|
# handle reasoning_tokens
|
||||||
_completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None
|
_completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None
|
||||||
if reasoning_tokens:
|
if reasoning_tokens:
|
||||||
|
text_tokens = (
|
||||||
|
completion_tokens - reasoning_tokens if completion_tokens else None
|
||||||
|
)
|
||||||
completion_tokens_details = CompletionTokensDetailsWrapper(
|
completion_tokens_details = CompletionTokensDetailsWrapper(
|
||||||
reasoning_tokens=reasoning_tokens
|
reasoning_tokens=reasoning_tokens, text_tokens=text_tokens
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ensure completion_tokens_details is properly handled
|
# Ensure completion_tokens_details is properly handled
|
||||||
|
|